Source code for cytoflow.views.table

#!/usr/bin/env python3.4
# coding: latin-1

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2021
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

'''
cytoflow.views.table
--------------------
'''

from warnings import warn
from traits.api import HasStrictTraits, Str, provides, Tuple, Constant
import matplotlib.pyplot as plt

from matplotlib.table import Table

import pandas as pd
import numpy as np

from .i_view import IView
import cytoflow.utility as util

[docs]@provides(IView)
class TableView(HasStrictTraits):
    """
    "Plot" a tabular view of a statistic.  Mostly useful for GUIs.  Each level 
    of the statistic's index must be used in :attr:`row_facet`, 
    :attr:`column_facet`, :attr:`subrow_facet`, or :attr:`subcolumn_facet`.
    This module can't "plot" a statistic with more than four index levels
    unless :attr:`subset` is set and that results in extra levels being 
    dropped.
    
    Attributes
    ----------
    statistic : (str, str)
        The name of the statistic to plot.  Must be a key in the  
        :attr:`~Experiment.statistics` attribute of the :class:`~.Experiment`
        being plotted.  Each level of the statistic's index must be used 
        in :attr:`row_facet`, :attr:`column_facet`, :attr:`subrow_facet`, or
        :attr:`subcolumn_facet`.
        
    row_facet, column_facet : str
        The statistic facets to be used as row and column headers.
        
    subrow_facet, subcolumn_facet : str
        The statistic facets to be used as subrow and subcolumn headers.
        
    subset : str
        A Python expression used to select a subset of the statistic to plot.
        
    Examples
    --------
    
    Make a little data set.
    
    .. plot::
        :context: close-figs
            
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
        
    Add a threshold gate
    
    .. plot::
        :context: close-figs
    
        >>> ex2 = flow.ThresholdOp(name = 'Threshold',
        ...                        channel = 'Y2-A',
        ...                        threshold = 2000).apply(ex)
        
    Add a statistic
    
    .. plot::
        :context: close-figs

        >>> ex3 = flow.ChannelStatisticOp(name = "ByDox",
        ...                               channel = "Y2-A",
        ...                               by = ['Dox', 'Threshold'],
        ...                               function = len).apply(ex2) 
    
    "Plot" the table
    
    .. plot::
        :context: close-figs
        
        >>> flow.TableView(statistic = ("ByDox", "len"),
        ...                row_facet = "Dox",
        ...                column_facet = "Threshold").plot(ex3)
        
    """

    # traits   
    id = Constant("edu.mit.synbio.cytoflow.view.table")
    friendly_id = Constant("Table View") 

    REMOVED_ERROR = Constant("Statistics have changed dramatically in 0.5; please see the documentation")
    channel = util.Removed(err_string = REMOVED_ERROR)
    function = util.Removed(err_string = REMOVED_ERROR)
    
    statistic = Tuple(Str, Str)
    row_facet = Str
    subrow_facet = Str
    column_facet = Str
    subcolumn_facet = Str
    
    subset = Str

[docs]    def plot(self, experiment, plot_name = None, **kwargs):
        """Plot a table"""
        
        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")   
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError('statistic', 
                                         "Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]    
            
        data = pd.DataFrame(index = stat.index)
        data[stat.name] = stat   
        
        if self.subset:
            try:
                data = data.query(self.subset)
            except Exception as e:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' isn't valid"
                                             .format(self.subset)) from e
                
            if len(data) == 0:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' returned no values"
                                             .format(self.subset))
            
        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError as e:
                    raise util.CytoflowViewError(None,
                                                 "Must have more than one "
                                                 "value to plot.") from e
        
        if not (self.row_facet or self.column_facet):
            raise util.CytoflowViewError('row_facet',
                                         "Must set at least one of row_facet "
                                         "or column_facet")
            
        if self.subrow_facet and not self.row_facet:
            raise util.CytoflowViewError('subrow_facet',
                                         "Must set row_facet before using "
                                         "subrow_facet")
            
        if self.subcolumn_facet and not self.column_facet:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Must set column_facet before using "
                                         "subcolumn_facet")
            
        if self.row_facet and self.row_facet not in experiment.conditions:
            raise util.CytoflowViewError('row_facet',
                                         "Row facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.row_facet, experiment.conditions))        

        if self.row_facet and self.row_facet not in data.index.names:
            raise util.CytoflowViewError('row_facet',
                                         "Row facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.row_facet, data.index.names))  
            
        if self.subrow_facet and self.subrow_facet not in experiment.conditions:
            raise util.CytoflowViewError('subrow_facet',
                                         "Subrow facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.subrow_facet, experiment.conditions))  
            
        if self.subrow_facet and self.subrow_facet not in data.index.names:
            raise util.CytoflowViewError('subrow_facet',
                                         "Subrow facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.subrow_facet, data.index.names))  
            
        if self.column_facet and self.column_facet not in experiment.conditions:
            raise util.CytoflowViewError('column_facet',
                                         "Column facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.column_facet, experiment.conditions))  
            
        if self.column_facet and self.column_facet not in data.index.names:
            raise util.CytoflowViewError('column_facet',
                                         "Column facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.column_facet, data.index.names)) 
            
        if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Subcolumn facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.subcolumn_facet, experiment.conditions))  
            
        if self.subcolumn_facet and self.subcolumn_facet not in data.index.names:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Subcolumn facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.subcolumn_facet, data.index.names))  

        facets = [x for x in [self.row_facet, self.subrow_facet, 
                                      self.column_facet, self.subcolumn_facet] if x]
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError(None, 
                                         "Can't reuse facets")
        
        if set(facets) != set(data.index.names):
            raise util.CytoflowViewError(None,
                                         "Must use all the statistic indices as variables or facets: {}"
                                         .format(data.index.names))
            
        row_groups = data.index.get_level_values(self.row_facet).unique() \
                     if self.row_facet else [None]
                     
        subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \
                        if self.subrow_facet else [None] 
        
        col_groups = data.index.get_level_values(self.column_facet).unique() \
                     if self.column_facet else [None]
                     
        subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \
                        if self.subcolumn_facet else [None]

        row_offset = (self.column_facet != "") + (self.subcolumn_facet != "")        
        col_offset = (self.row_facet != "") + (self.subrow_facet != "")
        
        num_cols = len(col_groups) * len(subcol_groups) + col_offset
        
        fig = plt.figure()
        ax = fig.add_subplot(111)
        
        # hide the plot axes that matplotlib tries to make
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        for sp in ax.spines.values():
            sp.set_color('w')
            sp.set_zorder(0)
        
        loc = 'upper left'
        bbox = None
        
        t = Table(ax, loc, bbox, **kwargs)
        t.auto_set_font_size(False)
        for c in range(num_cols):
            t.auto_set_column_width(c)

        width = [0.2] * num_cols

        height = t._approx_text_height() * 1.8
         
        # make the main table       
        for (ri, r) in enumerate(row_groups):
            for (rri, rr) in enumerate(subrow_groups):
                for (ci, c) in enumerate(col_groups):
                    for (cci, cc) in enumerate(subcol_groups):
                        row_idx = ri * len(subrow_groups) + rri + row_offset
                        col_idx = ci * len(subcol_groups) + cci + col_offset
                        
                        # this is not pythonic, but i'm tired
                        agg_idx = []
                        for data_idx in data.index.names:
                            if data_idx == self.row_facet:
                                agg_idx.append(r)
                            elif data_idx == self.subrow_facet:
                                agg_idx.append(rr)
                            elif data_idx == self.column_facet:
                                agg_idx.append(c)
                            elif data_idx == self.subcolumn_facet:
                                agg_idx.append(cc)
                        
                        agg_idx = tuple(agg_idx)

                        try:
                            text = "{:g}".format(data.loc[agg_idx, stat.name])
                        except (TypeError, ValueError):
                            text = data.loc[agg_idx, stat.name]
                            
                        t.add_cell(row_idx, 
                                   col_idx,
                                   width = width[col_idx],
                                   height = height,
                                   text = text)
                        
        # row headers
        if self.row_facet:
            for (ri, r) in enumerate(row_groups):
                row_idx = ri * len(subrow_groups) + row_offset
                try:
                    text = "{0} = {1:g}".format(self.row_facet, r)
                except ValueError:
                    text = "{0} = {1}".format(self.row_facet, r)
                t.add_cell(row_idx,
                           0,
                           width = width[0],
                           height = height,
                           text = text)
                
        # subrow headers
        if self.subrow_facet:
            for (ri, r) in enumerate(row_groups):
                for (rri, rr) in enumerate(subrow_groups):
                    row_idx = ri * len(subrow_groups) + rri + row_offset
                    try:
                        text = "{0} = {1:g}".format(self.subrow_facet, rr)
                    except ValueError:
                        text = "{0} = {1}".format(self.subrow_facet, rr)
                        
                    t.add_cell(row_idx,
                               1,
                               width = width[1],
                               height = height,
                               text = text)
                    
        # column headers
        if self.column_facet:
            for (ci, c) in enumerate(col_groups):
                col_idx = ci * len(subcol_groups) + col_offset
                try:
                    text = "{0} = {1:g}".format(self.column_facet, c)
                except ValueError:
                    text = "{0} = {1}".format(self.column_facet, c)
                t.add_cell(0,
                           col_idx,
                           width = width[col_idx],
                           height = height,
                           text = text)

        # subcolumn headers
        if self.subcolumn_facet:
            for (ci, c) in enumerate(col_groups):
                for (cci, cc) in enumerate(subcol_groups):
                    col_idx = ci * len(subcol_groups) + cci + col_offset
                    try:
                        text = "{0} = {1:g}".format(self.subcolumn_facet, cc)
                    except ValueError:
                        text = "{0} = {1}".format(self.subcolumn_facet, cc)
                    t.add_cell(1,
                               col_idx,
                               width = width[col_idx],
                               height = height,
                               text = text)                
                        
        ax.add_table(t)
        
[docs]    def export(self, experiment, filename):
        """
        Export the table to a file.
        """
        
        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")   
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError('statistic', 
                                         "Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
        else:
            stat = experiment.statistics[self.statistic]    
            
            
        if self.row_facet and self.row_facet not in experiment.conditions:
            raise util.CytoflowViewError('row_facet',
                                         "Row facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.row_facet, experiment.conditions))       
            
        if self.subrow_facet and self.subrow_facet not in experiment.conditions:
            raise util.CytoflowViewError('subrow_facet',
                                         "Subrow facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.subrow_facet, experiment.conditions))  
            
        if self.column_facet and self.column_facet not in experiment.conditions:
            raise util.CytoflowViewError('column_facet',
                                         "Column facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.column_facet, experiment.conditions))
            
        if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Subcolumn facet {} not in the experiment, "
                                         "must be one of {}"
                                         .format(self.subcolumn_facet, experiment.conditions)) 
            
        data = pd.DataFrame(index = stat.index)
        data[stat.name] = stat   
        
        self._export_data(data, stat.name, filename)
    
    def _export_data(self, data, column_name, filename):
        
        if self.subset:
            try:
                data = data.query(self.subset)
            except Exception as e:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' isn't valid"
                                             .format(self.subset)) from e
                
            if len(data) == 0:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{0}' returned no values"
                                             .format(self.subset))
            
        names = list(data.index.names)
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {}; dropping it.".format(name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError as e:
                    raise util.CytoflowViewError(None,
                                                 "Must have more than one "
                                                 "value to plot.") from e
        
        if not (self.row_facet or self.column_facet):
            raise util.CytoflowViewError('row_facet',
                                         "Must set at least one of row_facet "
                                         "or column_facet")
            
        if self.subrow_facet and not self.row_facet:
            raise util.CytoflowViewError('subrow_facet',
                                         "Must set row_facet before using "
                                         "subrow_facet")
            
        if self.subcolumn_facet and not self.column_facet:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Must set column_facet before using "
                                         "subcolumn_facet")  

        if self.row_facet and self.row_facet not in data.index.names:
            raise util.CytoflowViewError('row_facet',
                                         "Row facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.row_facet, data.index.names))  
 
            
        if self.subrow_facet and self.subrow_facet not in data.index.names:
            raise util.CytoflowViewError('subrow_facet',
                                         "Subrow facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.subrow_facet, data.index.names))  
 
            
        if self.column_facet and self.column_facet not in data.index.names:
            raise util.CytoflowViewError('column_facet',
                                         "Column facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.column_facet, data.index.names)) 
            

            
        if self.subcolumn_facet and self.subcolumn_facet not in data.index.names:
            raise util.CytoflowViewError('subcolumn_facet',
                                         "Subcolumn facet {} not a statistic index; "
                                         "must be one of {}"
                                         .format(self.subcolumn_facet, data.index.names))  

        facets = [x for x in [self.row_facet, self.subrow_facet, 
                                      self.column_facet, self.subcolumn_facet] if x]
        if len(facets) != len(set(facets)):
            raise util.CytoflowViewError(None, 
                                         "Can't reuse facets")
        
        if set(facets) != set(data.index.names):
            raise util.CytoflowViewError(None,
                                         "Must use all the statistic indices as variables or facets: {}"
                                         .format(data.index.names))
            
        row_groups = data.index.get_level_values(self.row_facet).unique() \
                     if self.row_facet else [None]
                     
        subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \
                        if self.subrow_facet else [None] 
        
        col_groups = data.index.get_level_values(self.column_facet).unique() \
                     if self.column_facet else [None]
                     
        subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \
                        if self.subcolumn_facet else [None]

        row_offset = (self.column_facet != "") + (self.subcolumn_facet != "")        
        col_offset = (self.row_facet != "") + (self.subrow_facet != "")
        
        num_rows = len(row_groups) * len(subrow_groups) + row_offset
        num_cols = len(col_groups) * len(subcol_groups) + col_offset
        
        t = np.empty((num_rows, num_cols), dtype = np.object_)
        
        # make the main table       
        for (ri, r) in enumerate(row_groups):
            for (rri, rr) in enumerate(subrow_groups):
                for (ci, c) in enumerate(col_groups):
                    for (cci, cc) in enumerate(subcol_groups):
                        row_idx = ri * len(subrow_groups) + rri + row_offset
                        col_idx = ci * len(subcol_groups) + cci + col_offset
                        
                        # this is not pythonic, but i'm tired
                        agg_idx = []
                        for data_idx in data.index.names:
                            if data_idx == self.row_facet:
                                agg_idx.append(r)
                            elif data_idx == self.subrow_facet:
                                agg_idx.append(rr)
                            elif data_idx == self.column_facet:
                                agg_idx.append(c)
                            elif data_idx == self.subcolumn_facet:
                                agg_idx.append(cc)
                        
                        agg_idx = tuple(agg_idx)                            
                        t[row_idx, col_idx] = data.loc[agg_idx, column_name]
                        
        # row headers
        if self.row_facet:
            for (ri, r) in enumerate(row_groups):
                row_idx = ri * len(subrow_groups) + row_offset
                text = "{0} = {1}".format(self.row_facet, r)
                t[row_idx, 0] = text
                
        # subrow headers
        if self.subrow_facet:
            for (ri, r) in enumerate(row_groups):
                for (rri, rr) in enumerate(subrow_groups):
                    row_idx = ri * len(subrow_groups) + rri + row_offset
                    text = "{0} = {1}".format(self.subrow_facet, rr)
                    t[row_idx, 1] = text
                    
        # column headers
        if self.column_facet:
            for (ci, c) in enumerate(col_groups):
                col_idx = ci * len(subcol_groups) + col_offset
                text = "{0} = {1}".format(self.column_facet, c)
                t[0, col_idx] = text

        # column headers
        if self.subcolumn_facet:
            for (ci, c) in enumerate(col_groups):
                for (cci, cc) in enumerate(subcol_groups):
                    col_idx = ci * len(subcol_groups) + cci + col_offset
                    text = "{0} = {1}".format(self.subcolumn_facet, c)
                    t[1, col_idx] = text        
                    
        np.savetxt(filename, t, delimiter = ",", fmt = "%s")