Source code for cytoflow.views.table

#!/usr/bin/env python3.8
# coding: latin-1

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflow.views.table
--------------------

"Plot" a tabular view of a statistic.

`TableView` -- the `IView` class that makes the plot.
"""

from warnings import warn
from traits.api import HasStrictTraits, Str, provides, Tuple, Constant
import matplotlib.pyplot as plt

from matplotlib.table import Table

import pandas as pd
import numpy as np

from .i_view import IView
import cytoflow.utility as util

[docs]@provides(IView) class TableView(HasStrictTraits): """ "Plot" a tabular view of a statistic. Mostly useful for GUIs. Each level of the statistic's index must be used in `row_facet`, `column_facet`, `subrow_facet`, or `subcolumn_facet`. This module can't "plot" a statistic with more than four index levels unless `subset` is set and that results in extra levels being dropped. Attributes ---------- statistic : (str, str) The name of the statistic to plot. Must be a key in the `Experiment.statistics` attribute of the `Experiment` being plotted. Each level of the statistic's index must be used in `row_facet`, `column_facet`, `subrow_facet`, or `subcolumn_facet`. row_facet : str The statistic facet to be used as row headers. column_facet : str The statistic facet to be used as column headers. subrow_facet : str The statistic facet to be used as subrow headers. subcolumn_facet : str The statistic facet to be used as subcolumn headers. subset : str A Python expression used to select a subset of the statistic to plot. Examples -------- Make a little data set. .. plot:: :context: close-figs >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs", ... conditions = {'Dox' : 10.0}), ... flow.Tube(file = "Plate01/CFP_Well_A4.fcs", ... conditions = {'Dox' : 1.0})] >>> import_op.conditions = {'Dox' : 'float'} >>> ex = import_op.apply() Add a threshold gate .. plot:: :context: close-figs >>> ex2 = flow.ThresholdOp(name = 'Threshold', ... channel = 'Y2-A', ... threshold = 2000).apply(ex) Add a statistic .. plot:: :context: close-figs >>> ex3 = flow.ChannelStatisticOp(name = "ByDox", ... channel = "Y2-A", ... by = ['Dox', 'Threshold'], ... function = len).apply(ex2) "Plot" the table .. plot:: :context: close-figs >>> flow.TableView(statistic = ("ByDox", "len"), ... row_facet = "Dox", ... column_facet = "Threshold").plot(ex3) """ # traits id = Constant("edu.mit.synbio.cytoflow.view.table") friendly_id = Constant("Table View") statistic = Tuple(Str, Str) row_facet = Str subrow_facet = Str column_facet = Str subcolumn_facet = Str subset = Str
[docs] def plot(self, experiment, plot_name = None, **kwargs): """Plot a table""" if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError('statistic', "Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] data = pd.DataFrame(index = stat.index) data[stat.name] = stat if self.subset: try: data = data.query(self.subset) except Exception as e: raise util.CytoflowViewError('subset', "Subset string '{0}' isn't valid" .format(self.subset)) from e if len(data) == 0: raise util.CytoflowViewError('subset', "Subset string '{0}' returned no values" .format(self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError as e: raise util.CytoflowViewError(None, "Must have more than one " "value to plot.") from e if not (self.row_facet or self.column_facet): raise util.CytoflowViewError('row_facet', "Must set at least one of row_facet " "or column_facet") if self.subrow_facet and not self.row_facet: raise util.CytoflowViewError('subrow_facet', "Must set row_facet before using " "subrow_facet") if self.subcolumn_facet and not self.column_facet: raise util.CytoflowViewError('subcolumn_facet', "Must set column_facet before using " "subcolumn_facet") if self.row_facet and self.row_facet not in experiment.conditions: raise util.CytoflowViewError('row_facet', "Row facet {} not in the experiment, " "must be one of {}" .format(self.row_facet, experiment.conditions)) if self.row_facet and self.row_facet not in data.index.names: raise util.CytoflowViewError('row_facet', "Row facet {} not a statistic index; " "must be one of {}" .format(self.row_facet, data.index.names)) if self.subrow_facet and self.subrow_facet not in experiment.conditions: raise util.CytoflowViewError('subrow_facet', "Subrow facet {} not in the experiment, " "must be one of {}" .format(self.subrow_facet, experiment.conditions)) if self.subrow_facet and self.subrow_facet not in data.index.names: raise util.CytoflowViewError('subrow_facet', "Subrow facet {} not a statistic index; " "must be one of {}" .format(self.subrow_facet, data.index.names)) if self.column_facet and self.column_facet not in experiment.conditions: raise util.CytoflowViewError('column_facet', "Column facet {} not in the experiment, " "must be one of {}" .format(self.column_facet, experiment.conditions)) if self.column_facet and self.column_facet not in data.index.names: raise util.CytoflowViewError('column_facet', "Column facet {} not a statistic index; " "must be one of {}" .format(self.column_facet, data.index.names)) if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions: raise util.CytoflowViewError('subcolumn_facet', "Subcolumn facet {} not in the experiment, " "must be one of {}" .format(self.subcolumn_facet, experiment.conditions)) if self.subcolumn_facet and self.subcolumn_facet not in data.index.names: raise util.CytoflowViewError('subcolumn_facet', "Subcolumn facet {} not a statistic index; " "must be one of {}" .format(self.subcolumn_facet, data.index.names)) facets = [x for x in [self.row_facet, self.subrow_facet, self.column_facet, self.subcolumn_facet] if x] if len(facets) != len(set(facets)): raise util.CytoflowViewError(None, "Can't reuse facets") if set(facets) != set(data.index.names): raise util.CytoflowViewError(None, "Must use all the statistic indices as variables or facets: {}" .format(data.index.names)) row_groups = data.index.get_level_values(self.row_facet).unique() \ if self.row_facet else [None] subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \ if self.subrow_facet else [None] col_groups = data.index.get_level_values(self.column_facet).unique() \ if self.column_facet else [None] subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \ if self.subcolumn_facet else [None] row_offset = (self.column_facet != "") + (self.subcolumn_facet != "") col_offset = (self.row_facet != "") + (self.subrow_facet != "") num_cols = len(col_groups) * len(subcol_groups) + col_offset fig = plt.figure() ax = fig.add_subplot(111) # hide the plot axes that matplotlib tries to make ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.values(): sp.set_color('w') sp.set_zorder(0) loc = 'upper left' bbox = None t = Table(ax, loc, bbox, **kwargs) t.auto_set_font_size(False) for c in range(num_cols): t.auto_set_column_width(c) width = [0.2] * num_cols height = t._approx_text_height() * 1.8 # make the main table for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): row_idx = ri * len(subrow_groups) + rri + row_offset col_idx = ci * len(subcol_groups) + cci + col_offset # this is not pythonic, but i'm tired agg_idx = [] for data_idx in data.index.names: if data_idx == self.row_facet: agg_idx.append(r) elif data_idx == self.subrow_facet: agg_idx.append(rr) elif data_idx == self.column_facet: agg_idx.append(c) elif data_idx == self.subcolumn_facet: agg_idx.append(cc) agg_idx = tuple(agg_idx) try: text = "{:g}".format(data.loc[agg_idx, stat.name]) except (TypeError, ValueError): text = data.loc[agg_idx, stat.name] t.add_cell(row_idx, col_idx, width = width[col_idx], height = height, text = text) # row headers if self.row_facet: for (ri, r) in enumerate(row_groups): row_idx = ri * len(subrow_groups) + row_offset try: text = "{0} = {1:g}".format(self.row_facet, r) except ValueError: text = "{0} = {1}".format(self.row_facet, r) t.add_cell(row_idx, 0, width = width[0], height = height, text = text) # subrow headers if self.subrow_facet: for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): row_idx = ri * len(subrow_groups) + rri + row_offset try: text = "{0} = {1:g}".format(self.subrow_facet, rr) except ValueError: text = "{0} = {1}".format(self.subrow_facet, rr) t.add_cell(row_idx, 1, width = width[1], height = height, text = text) # column headers if self.column_facet: for (ci, c) in enumerate(col_groups): col_idx = ci * len(subcol_groups) + col_offset try: text = "{0} = {1:g}".format(self.column_facet, c) except ValueError: text = "{0} = {1}".format(self.column_facet, c) t.add_cell(0, col_idx, width = width[col_idx], height = height, text = text) # subcolumn headers if self.subcolumn_facet: for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): col_idx = ci * len(subcol_groups) + cci + col_offset try: text = "{0} = {1:g}".format(self.subcolumn_facet, cc) except ValueError: text = "{0} = {1}".format(self.subcolumn_facet, cc) t.add_cell(1, col_idx, width = width[col_idx], height = height, text = text) ax.add_table(t)
[docs] def export(self, experiment, filename): """ Export the table to a file. """ if experiment is None: raise util.CytoflowViewError('experiment', "No experiment specified") if self.statistic not in experiment.statistics: raise util.CytoflowViewError('statistic', "Can't find the statistic {} in the experiment" .format(self.statistic)) else: stat = experiment.statistics[self.statistic] if self.row_facet and self.row_facet not in experiment.conditions: raise util.CytoflowViewError('row_facet', "Row facet {} not in the experiment, " "must be one of {}" .format(self.row_facet, experiment.conditions)) if self.subrow_facet and self.subrow_facet not in experiment.conditions: raise util.CytoflowViewError('subrow_facet', "Subrow facet {} not in the experiment, " "must be one of {}" .format(self.subrow_facet, experiment.conditions)) if self.column_facet and self.column_facet not in experiment.conditions: raise util.CytoflowViewError('column_facet', "Column facet {} not in the experiment, " "must be one of {}" .format(self.column_facet, experiment.conditions)) if self.subcolumn_facet and self.subcolumn_facet not in experiment.conditions: raise util.CytoflowViewError('subcolumn_facet', "Subcolumn facet {} not in the experiment, " "must be one of {}" .format(self.subcolumn_facet, experiment.conditions)) data = pd.DataFrame(index = stat.index) data[stat.name] = stat self._export_data(data, stat.name, filename)
def _export_data(self, data, column_name, filename): if self.subset: try: data = data.query(self.subset) except Exception as e: raise util.CytoflowViewError('subset', "Subset string '{0}' isn't valid" .format(self.subset)) from e if len(data) == 0: raise util.CytoflowViewError('subset', "Subset string '{0}' returned no values" .format(self.subset)) names = list(data.index.names) for name in names: unique_values = data.index.get_level_values(name).unique() if len(unique_values) == 1: warn("Only one value for level {}; dropping it.".format(name), util.CytoflowViewWarning) try: data.index = data.index.droplevel(name) except AttributeError as e: raise util.CytoflowViewError(None, "Must have more than one " "value to plot.") from e if not (self.row_facet or self.column_facet): raise util.CytoflowViewError('row_facet', "Must set at least one of row_facet " "or column_facet") if self.subrow_facet and not self.row_facet: raise util.CytoflowViewError('subrow_facet', "Must set row_facet before using " "subrow_facet") if self.subcolumn_facet and not self.column_facet: raise util.CytoflowViewError('subcolumn_facet', "Must set column_facet before using " "subcolumn_facet") if self.row_facet and self.row_facet not in data.index.names: raise util.CytoflowViewError('row_facet', "Row facet {} not a statistic index; " "must be one of {}" .format(self.row_facet, data.index.names)) if self.subrow_facet and self.subrow_facet not in data.index.names: raise util.CytoflowViewError('subrow_facet', "Subrow facet {} not a statistic index; " "must be one of {}" .format(self.subrow_facet, data.index.names)) if self.column_facet and self.column_facet not in data.index.names: raise util.CytoflowViewError('column_facet', "Column facet {} not a statistic index; " "must be one of {}" .format(self.column_facet, data.index.names)) if self.subcolumn_facet and self.subcolumn_facet not in data.index.names: raise util.CytoflowViewError('subcolumn_facet', "Subcolumn facet {} not a statistic index; " "must be one of {}" .format(self.subcolumn_facet, data.index.names)) facets = [x for x in [self.row_facet, self.subrow_facet, self.column_facet, self.subcolumn_facet] if x] if len(facets) != len(set(facets)): raise util.CytoflowViewError(None, "Can't reuse facets") if set(facets) != set(data.index.names): raise util.CytoflowViewError(None, "Must use all the statistic indices as variables or facets: {}" .format(data.index.names)) row_groups = data.index.get_level_values(self.row_facet).unique() \ if self.row_facet else [None] subrow_groups = data.index.get_level_values(self.subrow_facet).unique() \ if self.subrow_facet else [None] col_groups = data.index.get_level_values(self.column_facet).unique() \ if self.column_facet else [None] subcol_groups = data.index.get_level_values(self.subcolumn_facet).unique() \ if self.subcolumn_facet else [None] row_offset = (self.column_facet != "") + (self.subcolumn_facet != "") col_offset = (self.row_facet != "") + (self.subrow_facet != "") num_rows = len(row_groups) * len(subrow_groups) + row_offset num_cols = len(col_groups) * len(subcol_groups) + col_offset t = np.empty((num_rows, num_cols), dtype = np.object_) # make the main table for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): row_idx = ri * len(subrow_groups) + rri + row_offset col_idx = ci * len(subcol_groups) + cci + col_offset # this is not pythonic, but i'm tired agg_idx = [] for data_idx in data.index.names: if data_idx == self.row_facet: agg_idx.append(r) elif data_idx == self.subrow_facet: agg_idx.append(rr) elif data_idx == self.column_facet: agg_idx.append(c) elif data_idx == self.subcolumn_facet: agg_idx.append(cc) agg_idx = tuple(agg_idx) t[row_idx, col_idx] = data.loc[agg_idx, column_name] # row headers if self.row_facet: for (ri, r) in enumerate(row_groups): row_idx = ri * len(subrow_groups) + row_offset text = "{0} = {1}".format(self.row_facet, r) t[row_idx, 0] = text # subrow headers if self.subrow_facet: for (ri, r) in enumerate(row_groups): for (rri, rr) in enumerate(subrow_groups): row_idx = ri * len(subrow_groups) + rri + row_offset text = "{0} = {1}".format(self.subrow_facet, rr) t[row_idx, 1] = text # column headers if self.column_facet: for (ci, c) in enumerate(col_groups): col_idx = ci * len(subcol_groups) + col_offset text = "{0} = {1}".format(self.column_facet, c) t[0, col_idx] = text # column headers if self.subcolumn_facet: for (ci, c) in enumerate(col_groups): for (cci, cc) in enumerate(subcol_groups): col_idx = ci * len(subcol_groups) + cci + col_offset text = "{0} = {1}".format(self.subcolumn_facet, c) t[1, col_idx] = text np.savetxt(filename, t, delimiter = ",", fmt = "%s")