Source code for bimato.utils

# Copyright (C) 2022 Tony Fischer
#
# This file is part of Bio Matrix Topology (BiMaTo).
#
# Bio Matrix Topology (BiMaTo) is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Bio Matrix Topology (BiMaTo) is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Bio Matrix Topology (BiMaTo).  If not, see <http://www.gnu.org/licenses/>.

"""
# TODO: write helpers module docstring
docstring of module
"""


__author__ = "Tony Fischer (tku137)"
__copyright__ = "Copyright 2022, Tony Fischer (tku137)"
__license__ = "GPLv3"
__email__ = "tonyfischer@mailbox.org"
__status__ = "Development"
__version__ = "2022.2.1"
__credits__ = ["Tony Fischer (tku137)", "Alexander Hayn"]


import numpy as np
from skimage.restoration import denoise_tv_chambolle


[docs]def read_lif_image(lif_image):
    '''It reads the image data from the readlif LifFile and returns it as a 3D numpy array

    Parameters
    ----------
    lif_image : readlif.reader.LifFile
        instance of readlif LifFile

    Returns
    -------
    numpy.ndarray
        A 3D numpy array of the image data.

    '''

    dims = lif_image.info['dims']

    data = np.zeros((dims.y, dims.x, dims.z))
    for z, im_plane in enumerate(lif_image.get_iter_z(t=0, c=0)):
        data[..., z] = np.array(im_plane)

    return data


[docs]def normalize_image(image, scale):
    """
    Standard function to normalize image data values to [0,1].

    Parameters
    ----------
    image : numpy.ndarray
        Input image data
    scale : int or float
        Scaling factor, e.g. 255 would scale to 8bit integer matrices

    Returns
    -------
    numpy.ndarray
        Normalized image data

    """
    im_min = image.min()
    im_max = image.max()
    return scale * ((image - im_min) / (im_max - im_min))


[docs]def get_weight_factor(size):
    """
    Calculates the weight factor for TV-denoising using scikit-image. Values have been determined empirically.

    Notes
    -----
    Formulas of fitting empirical data:

    - mean values: 3E-08x^2 + 8E-06x + 0,0037

    - more denoising: 3E-08x^2 + 6E-06x + 0,0127

    - linear fit was: 6E-05x - 0,0215

    Parameters
    ----------
    size: int
        Relevant size of image data
    Returns
    -------
    float
        TV-denoising weight factor
    """
    weight = 3e-8 * (size ** 2) + 8e-6 * size + 0.0037
    return weight


# TODO: inplace possible?
[docs]def denoise_image(image, denoise_weight=None):
    """
    Apply TV-denoising on input image data. Denoising weight factor is determined automatically, except explicitly
    specified

    Parameters
    ----------
    image : numpy.ndarray
        Input raw image data
    denoise_weight : int or float, optional
        Weight factor for scikit image TV-denoising

    Returns
    -------
    numpy.ndarray
        Denoised image data

    """
    # get weight factor
    small_edge_size = np.min(image.shape[:2])  # size of the smaller dim of an x-y image
    if denoise_weight:
        weight = denoise_weight
    else:
        weight = get_weight_factor(small_edge_size)

    # preallocate image array
    image_denoised = np.zeros_like(image, dtype=np.dtype('uint8'))

    # denoise per-plane for better results
    for z in range(image_denoised.shape[2]):
        # denoise_tv_chambolle always gives float64 in range [0,1]
        # rescale to unit8 [0,255] range
        image_denoised[:, :, z] = denoise_tv_chambolle(image[:, :, z], weight=weight) * 255

    #
    return image_denoised


[docs]def calc_weighted_median(df, data_col, weight_col):
    """
    Calculates the weighted median of specified DataFrame column with respect to another specified column-

    Parameters
    ----------
    df: pandas.DataFrame
        Input DataFrame that came from initial pore detection function
    data_col: str
        Name of column from which the median is calculated
    weight_col: str
        Name of column that is used as weight

    Returns
    -------
    pandas.Series
        Pandas Series with calculated weighted median

    """
    tmp_df = df.sort_values(data_col)
    cumsum = tmp_df[weight_col].values.cumsum()
    cutoff = tmp_df[weight_col].values.sum() / 2.0
    return tmp_df[data_col][cumsum >= cutoff].iloc[0]


[docs]def get_voxel_volume(sampling):
    return sampling['x'] * sampling['y'] * sampling['z']


# construct list of intervals as numpy array
[docs]def get_interval_list(part_size, number_parts):
    return [(np.array([0, 1]) + n) * part_size for n in range(number_parts)]


[docs]def get_intervals(data_shape, sampling, part_size_micron):
    # only depends on the part_size, the part numbers are calculated based on micron sizes
    number_parts_x, number_parts_y, number_parts_z = get_possible_part_numbers(data_shape, sampling, part_size_micron)
    part_size_x_px, part_size_y_px, part_size_z_px = get_part_size_px(data_shape, number_parts_x, number_parts_y,
                                                                      number_parts_z)

    intervals_x_px = get_interval_list(part_size_x_px, number_parts_x)
    intervals_y_px = get_interval_list(part_size_y_px, number_parts_y)
    intervals_z_px = get_interval_list(part_size_z_px, number_parts_z)

    return intervals_x_px, intervals_y_px, intervals_z_px


[docs]def get_cube_size_in_micron(data_shape, sampling):
    sx = int(data_shape[0]) * float(sampling['x'])
    sy = int(data_shape[1]) * float(sampling['y'])
    sz = int(data_shape[2]) * float(sampling['z'])
    return sx, sy, sz


[docs]def get_possible_part_numbers(data_shape, sampling, part_size):
    sx, sy, sz = get_cube_size_in_micron(data_shape, sampling)
    nx = np.floor(sx / part_size).astype('int')
    ny = np.floor(sy / part_size).astype('int')
    nz = np.floor(sz / part_size).astype('int')
    return nx, ny, nz


[docs]def get_part_size_px(data_shape, nx, ny, nz):
    px = np.floor(int(data_shape[0]) / nx).astype('int')
    py = np.floor(int(data_shape[1]) / ny).astype('int')
    pz = np.floor(int(data_shape[2]) / nz).astype('int')
    return px, py, pz