Source code for cytoflow.views.kde_1d

#!/usr/bin/env python3.8
# coding: latin-1

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflow.views.kde_1d
---------------------

A one-channel kernel density estimate (like a smoothed histogram).

`Kde1DView` -- the `IView` class that makes the plot.
"""

from traits.api import provides, Constant
import matplotlib.pyplot as plt

import numpy as np
from sklearn.neighbors import KernelDensity
from statsmodels.nonparametric.bandwidths import bw_scott, bw_silverman

import cytoflow.utility as util

from .i_view import IView
from .base_views import Base1DView

[docs]@provides(IView)
class Kde1DView(Base1DView):
    """
    Plots a one-channel kernel density estimate, which is like a smoothed
    histogram.
    
    Attributes
    ----------
        
    Examples
    --------
    
    Make a little data set.
    
    .. plot::
        :context: close-figs
            
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
        
    Plot a histogram
    
    .. plot::
        :context: close-figs
    
        >>> flow.Kde1DView(channel = 'Y2-A',
        ...                scale = 'log',
        ...                huefacet = 'Dox').plot(ex)
    """
    
    # traits   
    id = Constant("edu.mit.synbio.cytoflow.view.kde1d")
    friendly_id = Constant("1D Kernel Density") 
    
[docs]    def plot(self, experiment, **kwargs):
        """
        Plot a smoothed histogram view of a channel
        
        Parameters
        ----------
        shade : bool
            If `True` (the default), shade the area under the plot.
            
        alpha : float, >=0 and <= 1
            The transparency of the shading.  1 is opaque, 0 is transparent.
            Default = 0.25
            
        kernel : str
            The kernel to use for the kernel density estimate. Choices are:
            
                - ``gaussian`` (the default)
                - ``tophat``
                - ``epanechnikov``
                - ``exponential``
                - ``linear``
                - ``cosine``
                
        bw : str or float
            The bandwidth for the kernel, controls how lumpy or smooth the
            kernel estimate is.  Choices are:
            
                - ``scott`` (the default) - ``1.059 * A * nobs ** (-1/5.)``, where ``A`` is ``min(std(X),IQR/1.34)``
                - ``silverman`` - ``.9 * A * nobs ** (-1/5.)``, where ``A`` is ``min(std(X),IQR/1.34)``
                
            If a float is given, it is the bandwidth.   Note, this is in 
            scaled units, not data units.
            
        gridsize : int (default = 100)
            How many times to compute the kernel? 

        Notes
        -----
        Other ``kwargs`` are passed to `matplotlib.pyplot.plot <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.plot.html>`_
  
        
        """
        if kwargs.get('orientation', 'vertical') == 'vertical':
            kwargs.setdefault('xlabel', self.channel)
            kwargs.setdefault('ylabel', 'Density')
        else:  # flip axis labels
            kwargs.setdefault('xlabel', 'Density')
            kwargs.setdefault('ylabel', self.channel)
        super().plot(experiment, **kwargs)
                
    def _grid_plot(self, experiment, grid, **kwargs):

        kwargs.setdefault('shade', True)
        kwargs.setdefault('orientation', "vertical")
        
        scale = kwargs.pop('scale')[self.channel]
        lim = kwargs.pop('lim')[self.channel]
                  
        grid.map(_univariate_kdeplot, self.channel, scale = scale, **kwargs)
        
        ret = {}
        if kwargs['orientation'] == 'vertical':
            ret['xscale'] = scale
            ret['xlim'] = lim
        else:
            ret['yscale'] = scale
            ret['ylim'] = lim
            
        return ret


# yoinked from seaborn/distributions.py, with modifications for scaling.

def _univariate_kdeplot(data, scale=None, shade=False, kernel="gaussian",
        bw="scott", gridsize=100, cut=3, clip=None, legend=True,
        ax=None, orientation = "vertical", **kwargs):
    
    if ax is None:
        ax = plt.gca()
        
    if clip is None:
        clip = (-np.inf, np.inf)

    scaled_data = scale(data)
    
    # mask out the data that's not in the scale domain
    scaled_data = scaled_data[~np.isnan(scaled_data)]  
    
    if kernel not in ['gaussian','tophat','epanechnikov','exponential','linear','cosine']:
        raise util.CytoflowOpError(None,
                                   "kernel must be one of ['gaussian'|'tophat'|'epanechnikov'|'exponential'|'linear'|'cosine']")
    
    if bw == 'scott':
        bw = bw_scott(scaled_data)
    elif bw == 'silverman':
        bw = bw_silverman(scaled_data)
    elif not isinstance(bw, float):
        raise util.CytoflowViewError(None,
                                     "Bandwith must be 'scott', 'silverman' or a float")
    
    support = _kde_support(scaled_data, bw, gridsize, cut, clip)[:, np.newaxis]

    kde = KernelDensity(kernel = kernel, bandwidth = bw).fit(scaled_data.to_numpy()[:, np.newaxis])
    log_density = kde.score_samples(support)

    x = scale.inverse(support[:, 0])
    y = np.exp(log_density)

    # Check if a label was specified in the call
    label = kwargs.pop("label", None)
    color = kwargs.pop("color", None)
    alpha = kwargs.pop("alpha", 0.25)

    # Draw the KDE plot and, optionally, shade
    if orientation == "vertical":
        ax.plot(x, y, color=color, label=label, **kwargs)
        if shade:
            ax.fill_between(x, 1e-12, y, facecolor=color, alpha=alpha)
    else:
        ax.plot(y, x, color=color, label=label, **kwargs)
        if shade:
            ax.fill_between(y, 1e-12, x, facecolor=color, alpha=alpha)

    return ax

def _kde_support(data, bw, gridsize, cut, clip):
    """Establish support for a kernel density estimate."""
    support_min = max(data.min() - bw * cut, clip[0])
    support_max = min(data.max() + bw * cut, clip[1])
    return np.linspace(support_min, support_max, gridsize)

util.expand_class_attributes(Kde1DView)
util.expand_method_parameters(Kde1DView, Kde1DView.plot)