Source code for cytoflow.utility.log_scale

#!/usr/bin/env python3.8
# coding: latin-1

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflow.utility.log_scale
--------------------------

A scale that transforms data using a base-10 log.

`LogScale` -- implements `IScale`, the `cytoflow` interface for the scale.
"""

import numpy as np
import pandas as pd
import matplotlib.colors

from traits.api import (Instance, Str, provides, Constant, Enum, Float, 
                        Property, List, Array) 

from .scale import IScale, ScaleMixin, register_scale
from .cytoflow_errors import CytoflowError


[docs]
@provides(IScale)
class LogScale(ScaleMixin):
    id = Constant("cytoflow.utility.log_scale")
    name = "log"
    
    experiment = Instance("cytoflow.Experiment")
    
    # must set one of these.  they're considered in order.
    channel = Str
    condition = Str
    statistic = Str
    features = List(Str)
    data = Array

    mode = Enum("mask", "clip")
    threshold = Property(Float, depends_on = "[experiment, condition, channel, statistic, error_statistic]")
    _channel_threshold = Float(0.001)


[docs]
    def get_mpl_params(self, ax):
        """
        Returns a dict with the traits needed to initialize an instance of
        `matplotlib.scale.ScaleBase`
        """
        return {"nonpositive" : self.mode}

        
    def _set_threshold(self, threshold):
        self._channel_threshold = threshold
        
    def _get_threshold(self):
        if self.channel:
            return self._channel_threshold
        elif self.condition:
            cond = self.experiment[self.condition][self.experiment[self.condition] > 0]
            return cond.min()
        elif self.statistic in self.experiment.statistics:
            stat = self.experiment.statistics[self.statistic][self.features]
            return stat[stat > 0].min().min()
        elif self.data.size > 0:
            return self.data[self.data > 0].min()
                
        
    def __call__(self, data):
        """
        Transforms `data` using this scale.
        
        Careful!  May return `NaN` if the scale domain doesn't match the data 
        (ie, applying a log10 scale to negative numbers.)
        """
        
        # this function should work with: int, float, tuple, list, pd.Series, 
        # np.ndframe.  it should return the same data type as it was passed.
        
        if isinstance(data, (int, float)):
            if self.mode == "mask":
                if data < self.threshold:
                    raise CytoflowError("data <= scale.threshold (currently: {})".format(self.threshold))
                else:
                    return np.log10(data)
            else:
                if data < self.threshold:
                    return np.log10(self.threshold)
                else:
                    return np.log10(data)
        elif isinstance(data, (list, tuple)):
            ret = [self.__call__(x) for x in data]
            if isinstance(data, tuple):
                return tuple(ret)
            else:
                return ret
        elif isinstance(data, (np.ndarray, pd.Series)):
            mask_value = np.nan if self.mode == "mask" else self.threshold
            x = pd.Series(data)
            x = x.mask(x < self.threshold, other = mask_value)
            ret = np.log10(x)
            
            if isinstance(data, pd.Series):
                return ret
            else:
                return ret.values
        else:
            raise CytoflowError("Unknown type {} passed to log_scale.__call__"
                                .format(type(data)))
                        

[docs]
    def inverse(self, data):
        """
        Transforms 'data' using the inverse of this scale.
        """
        
        # this function should work with: int, float, tuple, list, pd.Series, 
        # np.ndframe
        if isinstance(data, (int, float)):
            return np.power(10, data)
        elif isinstance(data, (list, tuple)):
            ret = [np.power(10, x) for x in data]
            if isinstance(data, tuple):
                return tuple(ret)
            else:
                return ret
        elif isinstance(data, (np.ndarray, pd.Series)):
            return np.power(10, data)
        else:
            raise CytoflowError("Unknown type {} passed to log_scale.inverse"
                                .format(type(data)))

    

[docs]
    def clip(self, data):
        """
        Clips data to the range of the scale function
        """
        
        if isinstance(data, pd.Series):            
            return data.clip(lower = self.threshold)
        elif isinstance(data, np.ndarray):
            return data.clip(min = self.threshold)
        elif isinstance(data, float):
            return max(data, self.threshold)
        elif isinstance(data, int):
            data = float(data)
            return max(data, self.threshold)
        else:
            try:
                return [max(x, self.threshold) for x in data]
            except TypeError as e:
                raise CytoflowError("Unknown data type in LogScale.clip") from e

            

[docs]
    def norm(self, vmin = None, vmax = None):
        """
        A factory function that returns `matplotlib.colors.Normalize` instance,
        which normalizes values for a `matplotlib` color palette.
        """
        
        if vmin is not None and vmax is not None:
            pass
        elif self.channel:
            vmin = self.experiment[self.channel].min()
            vmax = self.experiment[self.channel].max()

        elif self.condition:
            vmin = self.experiment[self.condition].min()
            vmax = self.experiment[self.condition].max()
                
        elif self.statistic in self.experiment.statistics:
            stat = self.experiment.statistics[self.statistic][self.features]
            vmin = stat.min().min()
            vmax = stat.max().max()

        elif self.data.size > 0:
            vmin = self.data.min()
            vmax = self.data.max()
        else:
            raise CytoflowError("Must set one of 'channel', 'condition' "
                                "or 'statistic'.")
        
        return matplotlib.colors.LogNorm(vmin = self.clip(vmin), vmax = self.clip(vmax))



register_scale(LogScale)