from copy import deepcopy
import numpy as np
from .imprt import preset_import
from .log import get_logger
logger = get_logger()
[docs]def normalize_uint(arr):
r"""Normalizes the input ``uint`` array such that its ``dtype`` maximum
becomes :math:`1`.
Args:
arr (numpy.ndarray): Input array of type ``uint``.
Returns:
numpy.ndarray: Normalized array of type ``float``.
"""
if arr.dtype not in (np.uint8, np.uint16):
raise TypeError(arr.dtype)
maxv = np.iinfo(arr.dtype).max
arr_ = arr.astype(float)
arr_ = arr_ / maxv
return arr_
[docs]def denormalize_float(arr, uint_type='uint8'):
r"""De-normalizes the input ``float`` array such that :math:`1` becomes
the target ``uint`` maximum.
Args:
arr (numpy.ndarray): Input array of type ``float``.
uint_type (str, optional): Target ``uint`` type.
Returns:
numpy.ndarray: De-normalized array of the target type.
"""
_assert_float_0to1(arr)
if uint_type not in ('uint8', 'uint16'):
raise TypeError(uint_type)
maxv = np.iinfo(uint_type).max
arr_ = arr * maxv
arr_ = arr_.astype(uint_type)
return arr_
[docs]def alpha_blend(arr1, alpha, arr2=None):
r"""Alpha-blends two arrays, or masks one array.
Args:
arr1 (numpy.ndarray): Input array.
alpha (numpy.ndarray): Alpha map whose values are :math:`\in [0,1]`.
arr2 (numpy.ndarray): Input array. If ``None``, ``arr1`` will be
blended with an all-zero array, equivalent to masking ``arr1``.
Returns:
numpy.ndarray: Blended array of type ``float``.
"""
arr1 = arr1.astype(float)
if arr2 is None:
arr2 = np.zeros(arr1.shape, dtype=arr1.dtype)
if alpha.shape != arr1.shape:
if alpha.ndim == 2 and arr1.ndim == 3:
alpha = np.dstack([alpha] * arr1.shape[2])
elif alpha.ndim == 3 and alpha.shape[2] == 1 and arr1.ndim == 3:
alpha = np.tile(alpha, (1, 1, arr1.shape[2]))
else:
raise NotImplementedError(
"{arr_s} and {alpha_s}".format(
alpha_s=alpha.shape, arr_s=arr1.shape))
blend = np.multiply(arr1, alpha) + np.multiply(arr2, 1 - alpha)
return blend
[docs]def resize(arr, new_h=None, new_w=None, method='cv2'):
"""Resizes an image, with the option of maintaining the aspect ratio.
Args:
arr (numpy.ndarray): Image to binarize. If multiple-channel, each
channel is resized independently.
new_h (int, optional): Target height. If ``None``, will be calculated
according to the target width, assuming the same aspect ratio.
new_w (int, optional): Target width. If ``None``, will be calculated
according to the target height, assuming the same aspect ratio.
method (str, optional): Accepted values: ``'cv2'``, ``'tf'``, and
``'pil'``.
Returns:
numpy.ndarray: Resized image.
"""
h, w = arr.shape[:2]
if new_h is not None and new_w is not None:
if int(h / w * new_w) != new_h:
logger.warning((
"Aspect ratio changed in resizing: original size is %s; "
"new size is %s"), (h, w), (new_h, new_w))
elif new_h is None and new_w is not None:
new_h = int(h / w * new_w)
elif new_h is not None and new_w is None:
new_w = int(w / h * new_h)
else:
raise ValueError("At least one of new height or width must be given")
method = method.lower()
if method in ('tf', 'tensorflow'):
tf = preset_import('tensorflow', assert_success=True)
tf.compat.v1.enable_eager_execution()
tensor = tf.convert_to_tensor(arr)
tensor_resized = tf.image.resize(
tensor, (new_h, new_w), method='bilinear', antialias=True)
resized = tensor_resized.numpy()
elif method in ('cv', 'cv2', 'opencv'):
cv2 = preset_import('cv2', assert_success=True)
interp = cv2.INTER_LINEAR if new_h > h else cv2.INTER_AREA
resized = cv2.resize(arr, (new_w, new_h), interpolation=interp)
elif method in ('pil', 'pillow'):
Image = preset_import('Image', assert_success=True)
img = Image.fromarray(arr)
img = img.resize((new_w, new_h))
resized = np.array(img)
else:
raise NotImplementedError(method)
return resized
[docs]def binarize(im, threshold=None):
"""Binarizes images.
Args:
im (numpy.ndarray): Image to binarize. Of any integer type (``uint8``,
``uint16``, etc.). If H-by-W-by-3, will be converted to grayscale
and treated as H-by-W.
threshold (float, optional): Threshold for binarization. ``None``
means midpoint of the ``dtype``.
Returns:
numpy.ndarray: Binarized image. Of only 0's and 1's.
"""
im_copy = deepcopy(im)
# RGB to grayscale
if im_copy.ndim == 3 and im_copy.shape[2] == 3: # h-by-w-by-3
cv2 = preset_import('cv2', assert_success=True)
im_copy = cv2.cvtColor(im_copy, cv2.COLOR_BGR2GRAY)
if im_copy.ndim == 2: # h-by-w
# Compute threshold from data type
if threshold is None:
maxval = np.iinfo(im_copy.dtype).max
threshold = maxval / 2.
im_bin = im_copy
logicalmap = im_copy > threshold
im_bin[logicalmap] = 1
im_bin[np.logical_not(logicalmap)] = 0
else:
raise ValueError("'im' is neither h-by-w nor h-by-w-by-3")
return im_bin
[docs]def remove_islands(im, min_n_pixels, connectivity=4):
"""Removes small islands of pixels from a binary image.
Args:
im (numpy.ndarray): Input binary image. Of only 0's and 1's.
min_n_pixels (int): Minimum island size to keep.
connectivity (int, optional): Definition of "connected": either 4 or 8.
Returns:
numpy.ndarray: Output image with small islands removed.
"""
cv2 = preset_import('cv2', assert_success=True)
# Validate inputs
assert (len(im.shape) == 2), \
"'im' needs to have exactly two dimensions"
assert np.array_equal(np.unique(im), np.array([0, 1])), \
"'im' needs to contain only 0's and 1's"
assert connectivity in (4, 8), \
"'connectivity' must be either 4 or 8"
# Find islands, big or small
nlabels, labelmap, leftx_topy_bbw_bbh_npix, _ = \
cv2.connectedComponentsWithStats(im, connectivity)
# Figure out background is 0 or 1
bgval = im[labelmap == 0][0]
# Set small islands to background value
im_clean = im
for i in range(1, nlabels): # skip the 0th island -- background
island_size = leftx_topy_bbw_bbh_npix[i, -1]
if island_size < min_n_pixels:
im_clean[labelmap == i] = bgval
return im_clean
[docs]def grid_query_img(im, query_x, query_y, method='bilinear'):
r"""Grid queries an image via interpolation.
If you want to grid query unstructured data, consider
:func:`grid_query_unstruct`.
This function uses either bilinear interpolation that allows you to break
big matrices into patches and work locally, or bivariate spline
interpolation that fits a global spline (so memory-intensive) and shows
global effects.
Args:
im (numpy.ndarray): H-by-W or H-by-W-by-C rectangular grid of data.
Each of C channels is interpolated independently.
query_x (array_like): :math:`x` coordinates of the queried rectangle,
e.g., ``np.arange(10)`` for a 10-by-10 grid (hence, this should
*not* be generated by :func:`numpy.meshgrid` or similar
functions).
query_y (array_like): :math:`y` coordinates, following this
convention:
.. code-block:: none
+---------> query_x
|
|
|
v query_y
method (str, optional): Interpolation method: ``'spline'`` or
``'bilinear'``.
Returns:
numpy.ndarray: Interpolated values at query locations, of shape
``(len(query_y), len(query_x))`` for single-channel input or
``(len(query_y), len(query_x), im.shape[2])`` for multi-channel
input.
"""
from scipy.interpolate import RectBivariateSpline, interp2d
# Figure out image size and number of channels
if im.ndim == 3:
h, w, c = im.shape
if c == 1: # single dimension
im = im[:, :, 0]
elif im.ndim == 2:
h, w = im.shape
c = 1
else:
raise ValueError("'im' must have either two or three dimensions")
x = np.arange(w)
y = np.arange(h)
if query_x.min() < 0 or query_x.max() > w - 1 or \
query_y.min() < 0 or query_y.max() > h - 1:
logger.warning("Sure you want to query points outside 'im'?")
def query(x, y, z, qx, qy, method):
if method == 'spline':
# TODO: test whether we need to swap x and y
spline_obj = RectBivariateSpline(y, x, z)
qz = spline_obj(qy, qx, grid=True)
elif method == 'bilinear':
f = interp2d(x, y, z, kind='linear')
qz = f(qx, qy)
else:
raise NotImplementedError("Other interplation methods")
return qz
if c == 1:
# Single channel
z = im
logger.info("Interpolation (method: %s) started", method)
interp_val = query(x, y, z, query_x, query_y, method)
logger.info("... done")
else:
# Multiple channels
interp_val = np.zeros((len(query_x), len(query_y), c))
for i in range(c):
z = im[:, :, i]
logger.info(
"Interpolation (method: %s) started for channel %d/%d",
method, i + 1, c)
interp_val[:, :, i] = query(x, y, z, query_x, query_y, method)
logger.info("... done")
return interp_val
[docs]def grid_query_unstruct(uvs, values, grid_res, method=None):
r"""Grid queries unstructured data given by coordinates and their values.
If you are looking to grid query structured data, such as an image, check
out :func:`grid_query_img`.
This function interpolates values on a rectangular grid given some sparse,
unstrucured samples. One use case is where you have some UV locations and
their associated colors, and you want to "paint the colors" on a UV canvas.
Args:
uvs (numpy.ndarray): N-by-2 array of UV coordinates where we have
values (e.g., colors). See
:func:`xiuminglib.blender.object.smart_uv_unwrap` for the UV
coordinate convention.
values (numpy.ndarray): N-by-M array of M-D values at the N UV
locations, or N-array of scalar values at the N UV locations.
Channels are interpolated independently.
grid_res (array_like): Resolution (height first; then width) of
the query grid.
method (dict, optional): Dictionary of method-specific parameters.
Implemented methods and their default parameters:
.. code-block:: python
# Default
method = {
'func': 'griddata',
# Which SciPy function to call.
'func_underlying': 'linear',
# Fed to `griddata` as the `method` parameter.
'fill_value': (0,), # black
# Will be used to fill in pixels outside the convex hulls
# formed by the UV locations, and if `max_l1_interp` is
# provided, also the pixels whose interpolation is too much
# of a stretch to be trusted. In the context of "canvas
# painting," this will be the canvas' base color.
'max_l1_interp': np.inf, # trust/accept all interpolations
# Maximum L1 distance, which we can trust in interpolation,
# to pixels that have values. Interpolation across a longer
# range will not be trusted, and hence will be filled with
# `fill_value`.
}
.. code-block:: python
method = {
'func': 'rbf',
# Which SciPy function to call.
'func_underlying': 'linear',
# Fed to `Rbf` as the `method` parameter.
'smooth': 0, # no smoothing
# Fed to `Rbf` as the `smooth` parameter.
}
Returns:
numpy.ndarray: Interpolated values at query locations, of shape
``grid_res`` for single-channel input or ``(grid_res[0], grid_res[1],
values.shape[2])`` for multi-channel input.
"""
if values.ndim == 1:
values = values.reshape(-1, 1)
assert values.ndim == 2 and values.shape[0] == uvs.shape[0]
if method is None:
method = {'func': 'griddata'}
h, w = grid_res
# Generate query coordinates
grid_x, grid_y = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
# +---> x
# |
# v y
grid_u, grid_v = grid_x, 1 - grid_y
# ^ v
# |
# +---> u
if method['func'] == 'griddata':
from scipy.interpolate import griddata
cv2 = preset_import('cv2', assert_success=True)
func_underlying = method.get('func_underlying', 'linear')
fill_value = method.get('fill_value', (0,))
max_l1_interp = method.get('max_l1_interp', np.inf)
fill_value = np.array(fill_value)
if len(fill_value) == 1:
fill_value = np.tile(fill_value, values.shape[1])
assert len(fill_value) == values.shape[1]
if max_l1_interp is None:
max_l1_interp = np.inf # trust everything
# Figure out which pixels can be trusted
has_value = np.zeros((h, w), dtype=np.uint8)
ri = ((1 - uvs[:, 1]) * (h - 1)).astype(int).ravel()
ci = (uvs[:, 0] * (w - 1)).astype(int).ravel()
in_canvas = np.logical_and.reduce(
(ri >= 0, ri < h, ci >= 0, ci < w)) # to ignore out-of-canvas points
has_value[ri[in_canvas], ci[in_canvas]] = 1
dist2val = cv2.distanceTransform(1 - has_value, cv2.DIST_L1, 3)
trusted = dist2val <= max_l1_interp
# Process each color channel separately
interps = []
for ch_i in range(values.shape[1]):
v_fill = fill_value[ch_i]
v = values[:, ch_i]
interp = griddata(uvs, v, (grid_u, grid_v),
method=func_underlying,
fill_value=v_fill)
interp[~trusted] = v_fill
interps.append(interp)
interps = np.dstack(interps)
elif method['func'] == 'rbf':
from scipy.interpolate import Rbf
func_underlying = method.get('func_underlying', 'linear')
smooth = method.get('smooth', 0)
# Process each color channel separately
interps = []
for ch_i in range(values.shape[1]):
v = values[:, ch_i]
rbfi = Rbf(uvs[:, 0], uvs[:, 1], v,
function=func_underlying,
smooth=smooth)
interp = rbfi(grid_u, grid_v)
interps.append(interp)
interps = np.dstack(interps)
else:
raise NotImplementedError(method['func'])
if interps.shape[2] == 1:
return interps[:, :, 0].squeeze()
return interps
[docs]def find_local_extrema(im, want_maxima, kernel_size=3):
"""Finds local maxima or minima in an image.
Args:
im (numpy.ndarray): H-by-W if single-channel (e.g., grayscale)
or H-by-W-by-C for multi-channel (e.g., RGB) images. Extrema
are found independently for each of the C channels.
want_maxima (bool): Whether maxima or minima are wanted.
kernel_size (int, optional): Side length of the square window under
consideration. Must be larger than 1.
Returns:
numpy.ndarray: Binary map indicating if each pixel is a local extremum.
"""
from scipy.ndimage.filters import minimum_filter, maximum_filter
logger.error("find_local_extrema() not tested yet!")
# Figure out image size and number of channels
if im.ndim == 3:
h, w, c = im.shape
expanded = False
elif im.ndim == 2:
h, w = im.shape
c = 1
im = np.expand_dims(im, axis=2) # adds singleton dimension
expanded = True
else:
raise ValueError("'im' must have either two or three dimensions")
kernel = np.ones((kernel_size, kernel_size)).astype(bool)
is_extremum = np.zeros((h, w, c), dtype=bool)
for i in range(c):
z = im[:, :, i]
if want_maxima:
equals_extremum = maximum_filter(z, footprint=kernel) == z
else:
equals_extremum = minimum_filter(z, footprint=kernel) == z
is_extremum[:, :, i] = equals_extremum
if expanded:
is_extremum = is_extremum[:, :, 0]
return is_extremum
[docs]def compute_gradients(im):
"""Computes magnitudes and orientations of image gradients.
With Scharr operators:
.. code-block:: none
[ 3 0 -3 ] [ 3 10 3]
[10 0 -10] and [ 0 0 0]
[ 3 0 -3 ] [-3 -10 -3]
Args:
im (numpy.ndarray): H-by-W if single-channel (e.g., grayscale) or
H-by-W-by-C if multi-channel (e.g., RGB) images. Gradients are
computed independently for each of the C channels.
Returns:
tuple:
- **grad_mag** (*numpy.ndarray*) -- Magnitude image of the
gradients.
- **grad_orient** (*numpy.ndarray*) -- Orientation image of the
gradients (in radians).
.. code-block:: none
y ^ pi/2
|
pi |
--------+--------> 0
-pi | x
| -pi/2
"""
cv2 = preset_import('cv2', assert_success=True)
# Figure out image size and number of channels
if im.ndim == 3:
h, w, c = im.shape
expanded = False
elif im.ndim == 2:
h, w = im.shape
c = 1
im = np.expand_dims(im, axis=2) # adds singleton dimension
expanded = True
else:
raise ValueError("'im' must have either two or three dimensions")
grad_mag = np.zeros((h, w, c))
grad_orient = np.zeros((h, w, c))
for i in range(c):
z = im[:, :, i]
ddepth = -1 # same depth as the source
# Along horizontal direction
xorder, yorder = 1, 0
grad_h = cv2.Sobel(z, ddepth, xorder, yorder, ksize=-1) # 3x3 Scharr
grad_h = grad_h.astype(float)
# Along vertical direction
xorder, yorder = 0, 1
grad_v = cv2.Sobel(z, ddepth, xorder, yorder, ksize=-1) # 3x3 Scharr
grad_v = grad_v.astype(float)
# Magnitude
grad_mag[:, :, i] = np.sqrt(np.square(grad_h) + np.square(grad_v))
# Orientation
grad_orient[:, :, i] = np.arctan2(grad_v, grad_h)
if expanded:
grad_mag = grad_mag[:, :, 0]
grad_orient = grad_orient[:, :, 0]
return grad_mag, grad_orient
[docs]def gamma_correct(im, gamma=2.2):
r"""Applies gamma correction to an ``uint`` image.
Args:
im (numpy.ndarray): H-by-W if single-channel (e.g., grayscale) or
H-by-W-by-C multi-channel (e.g., RGB) ``uint`` images.
gamma (float, optional): Gamma value :math:`< 1` shifts image towards
the darker end of the spectrum, while value :math:`> 1` towards
the brighter.
Returns:
numpy.ndarray: Gamma-corrected image.
"""
cv2 = preset_import('cv2', assert_success=True)
assert im.dtype in ('uint8', 'uint16')
# Don't correct alpha channel, if exists
alpha = None
if im.ndim == 3 and im.shape[2] == 4:
alpha = im[:, :, 3]
im = im[:, :, :3]
# Correct with lookup table
type_max = np.iinfo(im.dtype).max
table = np.array([
((x / type_max) ** (1 / gamma)) * type_max
for x in np.arange(0, type_max + 1)
]).astype(im.dtype)
im_corrected = cv2.LUT(im, table)
# Concat alpha channel back
if alpha is not None:
im_corrected = np.dstack((im_corrected, alpha))
return im_corrected
[docs]def rgb2lum(im):
"""Converts RGB to relative luminance (if input is linear RGB) or luma
(if input is gamma-corrected RGB).
Args:
im (numpy.ndarray): RGB array of shape ``(..., 3)``.
Returns:
numpy.ndarray: Relative luminance or luma array.
"""
assert im.shape[-1] == 3, "Input's last dimension must hold RGB"
lum = 0.2126 * im[..., 0] + 0.7152 * im[..., 1] + 0.0722 * im[..., 2]
return lum
def _assert_float_0to1(arr):
if arr.dtype.kind != 'f':
raise TypeError("Input must be float (is %s)" % arr.dtype)
if (arr < 0).any() or (arr > 1).any():
raise ValueError("Input image has pixels outside [0, 1]")
def _assert_3ch(arr):
if arr.ndim != 3:
raise ValueError("Input image is not even 3D (H-by-W-by-3)")
n_ch = arr.shape[2]
if n_ch != 3:
raise ValueError("Input image must have 3 channels, but has %d" % n_ch)
srgb_linear_thres = 0.0031308
srgb_linear_coeff = 12.92
srgb_exponential_coeff = 1.055
srgb_exponent = 2.4
[docs]def linear2srgb(im, clip=False):
r"""Converts an image from linear RGB values to sRGB.
Args:
im (numpy.ndarray): Of type ``float``, and all pixels must be
:math:`\in [0, 1]`.
clip (bool, optional): Whether to clip values to :math:`[0,1]`.
Defaults to ``False``.
Returns:
numpy.ndarray: Converted image in sRGB.
"""
if clip:
im = np.clip(im, 0, 1)
_assert_float_0to1(im)
im_ = deepcopy(im)
# Guaranteed to be [0, 1] floats
linear_ind = im_ <= srgb_linear_thres
nonlinear_ind = im_ > srgb_linear_thres
im_[linear_ind] = im_[linear_ind] * srgb_linear_coeff
im_[nonlinear_ind] = srgb_exponential_coeff * (
np.power(im_[nonlinear_ind], 1 / srgb_exponent)
) - (srgb_exponential_coeff - 1)
return im_
[docs]def srgb2linear(im, clip=False):
r"""Converts an image from sRGB values to linear RGB.
Args:
im (numpy.ndarray): Of type ``float``, and all pixels must be
:math:`\in [0, 1]`.
clip (bool, optional): Whether to clip values to :math:`[0,1]`.
Defaults to ``False``.
Returns:
numpy.ndarray: Converted image in linear RGB.
"""
if clip:
im = np.clip(im, 0, 1)
_assert_float_0to1(im)
im_ = deepcopy(im)
# Guaranteed to be [0, 1] floats
gamma = (
(im_ + srgb_exponential_coeff - 1) / srgb_exponential_coeff
) ** srgb_exponent
scale = im_ / srgb_linear_coeff
im_ = np.where(im_ > srgb_linear_thres * srgb_linear_coeff, gamma, scale)
return im_
[docs]def tonemap(hdr, method='gamma', gamma=2.2):
r"""Tonemaps an HDR image.
Args:
hdr (numpy.ndarray): HDR image.
method (str, optional): Values accepted: ``'gamma'`` and ``'reinhard'``.
gamma (float, optional): Gamma value used if method is ``'gamma'``.
Returns:
numpy.ndarray: Tonemapped image :math:`\in [0, 1]`.
"""
if method == 'reinhard':
cv2 = preset_import('cv2', assert_success=True)
tonemapper = cv2.createTonemapReinhard(1, 1, 0, 0)
img = tonemapper.process(hdr)
elif method == 'gamma':
img = (hdr / hdr.max()) ** (1 / gamma)
else:
raise ValueError(method)
# Clip, if necessary, to guard against numerical errors
minv, maxv = img.min(), img.max()
if minv < 0:
logger.warning("Clipping negative values (min.: %f)", minv)
img = np.clip(img, 0, np.inf)
if maxv > 1:
logger.warning("Clipping >1 values (max.: %f)", maxv)
img = np.clip(img, -np.inf, 1)
return img