Source code for cytoflow.views.mst

#!/usr/bin/env python3.11
# coding: latin-1

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2025
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflow.views.mst
------------------

Plots a minimum spanning tree of a statistic. Particularly useful for
visualizing the results of a clustering operations such as `KMeansOp`
and `SOMOp`. 

`MSTView` -- plots the minimum spanning tree.
"""

import math
from warnings import warn
from natsort import natsorted

from traits.api import HasStrictTraits, provides, Enum, Str, Callable, Constant, \
                       List, Float, Int
import seaborn as sns
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.spatial.distance
import igraph
import numpy as np

import cytoflow
import cytoflow.utility as util

from .i_view import IView



[docs]
@provides(IView)
class MSTView(HasStrictTraits):
    """
    A view that creates a minimum spanning tree view of a statistic. 
    
    Set `statistic` to the name of the statistic to plot; set `feature` to the name
    of that statistic's feature you'd like to analyze. Then, set `locations` to
    *another* statistic whose features are the locations (in any number of dimensions)
    of the nodes in the tree -- usually these are cluster centroids from `KMeansOp`
    or `SOMOp` (see the example below). The view computes a minimum-spanning tree 
    containing the nodes and lays it out in two dimensions.
    
    There are three different ways of plotting the value at each location in tree: 
    
    * Setting `style` to ``heat`` (the default) will produce an MST with a circle 
      at each vertex and the color of the circle is related to the intensity of 
      the value of `feature`. (In this scenario, `variable` is ignored.)
      
    * Setting `style` to ``pie`` will draw a pie plot at each location. If 
      `variable` is set, then the values of `variable` are used as the categories
      of the pie, and the arc length of each slice of pie is related 
      to the intensity of the value of `feature`. If `variable` is unset, then
      `feature` is ignored and the *features* of the statistic are used as the
      categories.
      
    * Setting `style` to ``petal`` will draw a "petal plot" in each cell. If 
      `variable` is set, then the values of `variable` are used as the categories, 
      but unlike a pie plot, the arc width of each slice is equal. Instead, the 
      radius of the pie slice scales with the square root of the intensity, so 
      that the relationship between area and intensity remains the same. If 
      `variable` is unset, then `feature` is ignored and the *features* of the
      statistic are used as the categories.
      
    .. warning::
        If `style` is ``pie`` or ``petal``, then negative data will be clipped
        to 0! 
      
    Optionally, you can set `size_function` to scale the circles (or pies or petals)
    by a function computed on `Experiment.data`. (Often set to ``len`` to scale 
    by the number of events in each cluster.)
    
    .. note::
       If you'd like to *select* events based on this view (by drawing a 
       polygon around the nodes of the tree), you can do that with
       `SOMOp`.
    
    Attributes
    ----------
        
    statistic : Str
        The statistic to plot. Must be a key in `Experiment.statistics`.
        
    locations : Str
        A statistic whose levels are the same as `statistic` and whose features
        are the dimensions of the locations of each node to plot.
        
    .. note:: If `style` is ``heat``, then the levels of `statistic` must be the
              same as the levels of `locations`. If `style` is ``pie`` or ``petal``,
              the levels of `statistic` must be the levels of `locations` plus `variable`.
        
    locations_level : Str
        Which level in the `locations` statistic is different at each location? 
        The values of the others must be specified in the ``plot_name`` parameter 
        of `plot`.  Optional if there is only one level in `locations`.
        
    locations_features : List(Str)
        Which features in `locations` to use. By default, use all of them. 
        
    .. warning::
        The `KMeansOp` statistic is mostly locations, but also has the a 
        **Proportion** feature. You likely don't want to use it as a location 
        for laying out the minimum spanning tree!
            
    variable : Str
        The variable used for plotting pie and petal plots. Must be left empty
        for a heatmap.
        
    feature : Str
        The column in the statistic to plot (often a channel name.)
        
    style : Enum(``heat``, ``pie``, ``petal``) (default = ``heat``)
        What kind of plot to make?
        
    scale : {'linear', 'log', 'logicle'}
        For a heat map, how should the color of `feature` be scaled before 
        plotting? For pie and petal maps, how should the input data be normalized
        to [0,1] before plotting?
                
    size_function : Callable (default: None)
        If set, separate the `Experiment` into subsets by levels of `locations`, 
        compute a function on them, and scale the size of each tree node by 
        those values. The callable should take a single `pandas.DataFrame` 
        argument and return a *positive* ``float`` or value that can be cast to
        ``float`` (such as ``int``).  Of particular use is ``len``, which will
        scale the cells by the number of events in each subset.
        
    metric : Str (default: ``euclidean``)
        What metric should be used to compute distance in the tree? Must be one
        of ``braycurtis``, ``canberra``, ``chebyshev``, ``cityblock``, ``correlation``, 
        ``cosine``, ``dice``, ``euclidean``, ``hamming``, ``jaccard``, ``jensenshannon``, 
        ``kulczynski1``, ``mahalanobis``, ``matching``, ``minkowski``, ``rogerstanimoto``, 
        ``russellrao``, ``seuclidean``, ``sokalmichener``, ``sokalsneath``, ``sqeuclidean``, 
        ``yule``. Suggestion: use ``euclidean`` for small numbers of dimensions 
        (location features) and ``cosine`` for larger numbers.
        
    subset : str
        An expression that specifies the subset of the statistic to plot.
        Passed unmodified to `pandas.DataFrame.query`.
        
    Note
    ----
    `MSTView` is *not* a subclass of `BaseView` or any of its descendants.
    It implements the `IView` interface but does it does not use 
    `seaborn.FacetGrid` for laying out its plots.
    

    Examples
    --------
    
    Make a little data set.
    
    .. plot::
        :context: close-figs
            
        >>> import cytoflow as flow
        >>> import_op = flow.ImportOp()
        >>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
        ...                              conditions = {'Dox' : 10.0}),
        ...                    flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
        ...                              conditions = {'Dox' : 1.0})]
        >>> import_op.conditions = {'Dox' : 'float'}
        >>> ex = import_op.apply()
        
    Compute some KMeans clusters
    
    .. plot::
        :context: close-figs
        
        >>> km = flow.KMeansOp(name = "KMeans",
        ...                    channels = ["V2-A", "Y2-A", "B1-A"],
        ...                    scale = {"V2-A" : "logicle",
        ...                             "Y2-A" : "logicle",
        ...                             "B1-A" : "logicle"},
        ...                    num_clusters = 20)       
        >>> km.estimate(ex)
        >>> ex2 = km.apply(ex)
        
    Add a statistic
    
    .. plot::
        :context: close-figs

        >>> ex3 = flow.ChannelStatisticOp(name = "ByDox",
        ...                               channel = "Y2-A",
        ...                               by = ["KMeans", "Dox"],
        ...                               function = len).apply(ex2) 
    
    Plot the minimum spanning tree
    
    .. plot::
        :context: close-figs
        
        >>> flow.MSTView(statistic = "ByDox", 
        ...              locations = "KMeans", 
        ...              locations_features = ["V2-A", "Y2-A", "B1-A"],
        ...              feature = "Y2-A",
        ...              variable = "Dox",
        ...              style = "pie").plot(ex3)
    
    """
    
    id = Constant("cytoflow.view.mst")
    friendly_id = Constant("Minimum Spanning Tree") 
    
    statistic = Str
    locations = Str
    locations_level = Str
    locations_features = List(Str)
    variable = Str
    feature = Str
    scale = util.ScaleEnum
    style = Enum("heat", "pie", "petal")
    metric = Str("euclidean")
    subset = Str
    
    size_function = Callable
    
    # bits to support the interactive selector for MSTOp
    _loc_level = Str
    _vertices = List(List(Float))
    _groups = List(Int)
    

[docs]
    def plot(self, experiment, plot_name = None, **kwargs):
        """
        Plot a chart of a variable's values against a statistic.
        
        Parameters
        ----------
        experiment: Experiment
            The `Experiment` to plot using this view.
            
        plot_name : str
            If this `IView` can make multiple plots, ``plot_name`` is
            the name of the plot to make.  Must be one of the values retrieved
            from `enum_plots`.
            
        title : str
            Set the plot title
            
        legend : bool
            Plot a legend or color bar?  Defaults to `True`.
            
        legend_loc : str
            If we plot a legend, where should it go? This is a ``matplotlib``
            legend location string, like 'lower right' or 'outside center right'.
            Default is 'upper right'.
            
        legendlabel : str
            Set the label for the color bar or legend
 
        palette : palette name
            Colors to use for the different levels of the hue variable. 
            Should be something that can be interpreted by
            `seaborn.color_palette`. If plotting a heat map, this should be
            a continuous color map ('viridis' is the default.) Otherwise,
            choose either a discrete color map ('deep' is the default) or
            a continuous color map from which equi-spaced colors will be drawn.
            
        radius : float
            The radius of the circle or pie plots, on a scale from 0 to 1.
            
        All other parameters are passed to the `matplotlib.patches.Circle` or
        `matplotlib.patches.Wedge` construtors (ie, they should be patch attributes).

        """
        
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")
        
        if self.style == "heat" and self.variable != "":
            raise util.CytoflowViewError("variable",
                                         "If `style` is \"heat\", `variable` must be empty!")
        
        stat = self._get_stat(experiment)
        locs = self._get_locs(experiment)
        
        stat = self._subset_data(self.statistic, stat)
        locs = self._subset_data(self.locations, locs)
        experiment_data = experiment.data
        
        if locs.index.nlevels > 1:
            if not self.locations_level:
                raise util.CytoflowViewError('locations_level',
                                             "If 'locations' has more than one level, "
                                             "'locations_level' must be set!")

            if self.locations_level not in locs.index.names:
                raise util.CytoflowViewError('locations_level',
                                             "'locations_level' must be a level in 'locations'")
                
            if self.locations_level not in stat.index.names:
                raise util.CytoflowViewError('locations_level',
                                             "'locations_level' must be a level in 'statistic'")
            
            locations_level = self.locations_level
        else:
            locations_level = locs.index.names[0]
            
            if locations_level not in stat.index.names:
                raise util.CytoflowViewError(f"{locations_level} is the only level in 'locations', but it's not in 'statistic'")
            
        self._loc_level = locations_level
        unused_locs_levels = set(locs.index.names) - set([locations_level])
        unused_stat_levels = set(stat.index.names) - set([locations_level])
        
        if self.style == "heat":
            if not self.feature:
                raise util.CytoflowViewError('feature',
                                             "For style 'heat', you must set 'feature'!")
            
            if self.feature not in stat.columns:
                raise util.CytoflowViewError('feature',
                                             "Can't find feature '{}' in the statistic columns."
                                             .format(self.feature))

            
        if self.style != "heat":
            if self.variable:
                if self.variable not in stat.index.names:
                    raise util.CytoflowViewError('variable',
                                                 "Can't find variable '{}' in the statistic index."
                                                 .format(self.variable))
            
                if not self.feature:
                    raise util.CytoflowViewError('feature',
                                                 "For styles 'pie' and 'petal', if you set 'variable', you must set 'feature'!"
                                                 .format(self.variable))
                
                if self.feature not in stat.columns:
                    raise util.CytoflowViewError('feature',
                                                 "Can't find feature '{}' in the statistic columns."
                                                 .format(self.feature))
            
                unused_stat_levels -= set([self.variable])
                
        if unused_locs_levels != unused_stat_levels:
            if self.style != "heat" and self.variable:
                raise util.CytoflowViewError('locations',
                                             f"After removing 'variable' {self.variable}, unused levels in 'statistic' are {unused_stat_levels}; unused levels in 'locations' are {unused_locs_levels}. They should be the same!")
            else:
                raise util.CytoflowViewError('locations',
                                             f"Unused levels in 'statistic' are {unused_stat_levels}; unused levels in 'locations' are {unused_locs_levels}. They should be the same!")

            
        # need to re-index stat to be compatible with locs
        stat_names = locs.index.names + [self.variable] if self.variable else locs.index.names
        new_stat_idx = stat.index.reorder_levels(stat_names)
        stat = stat.copy()
        stat.index = new_stat_idx
        
        # gotta remove rows of stat that aren't in locs
        # (because some clustering add a -1 value
        stat_idx = stat.index.droplevel(self.variable) if self.variable else stat.index
        stat = stat[[v in locs.index for v in stat_idx]]

        if plot_name is not None and not unused_stat_levels:
            raise util.CytoflowViewError('plot_name',
                                         "You specified a plot name, but all "
                                         "the index levels of `locations` are already used")
        
        if unused_stat_levels:
            stat_groupby = stat.groupby(list(unused_stat_levels), observed = True)
            locs_groupby = locs.groupby(list(unused_stat_levels), observed = True)

            if plot_name is None:
                raise util.CytoflowViewError('plot_name',
                                             "You must use names {} in the plot name. "
                                             "Possible plot names: {}"
                                             .format(unused_stat_levels, list(stat_groupby.groups.keys())))

            if plot_name not in set(stat_groupby.groups.keys()):
                raise util.CytoflowViewError('plot_name',
                                             "Plot {} is invalid; must  be one of {}"
                                             .format(plot_name, list(stat_groupby.groups.keys())))
                
            stat = stat_groupby.get_group(plot_name if util.is_list_like(plot_name) else (plot_name,))
            locs = locs_groupby.get_group(plot_name if util.is_list_like(plot_name) else (plot_name,))
            if self.size_function:
                experiment_data = experiment.data.groupby(unused_stat_levels, observed = True).get_group(plot_name if util.is_list_like(plot_name) else (plot_name,))
        
        title = kwargs.pop("title", None)
        legend = kwargs.pop('legend', True)
        legend_loc = kwargs.pop('legend_loc', 'upper right')
        legendlabel = kwargs.pop('legendlabel', self.variable)
                
        if cytoflow.RUNNING_IN_GUI:
            sns_style = kwargs.pop('sns_style', 'whitegrid')
            sns_context = kwargs.pop('sns_context', 'notebook')
            sns.set_style(sns_style)
            sns.set_context(sns_context)
        else:
            if 'sns_style' in kwargs:
                kwargs.pop('sns_style')
                warn("'sns_style' is ignored when not running in the GUI. Feel free to change the seaborn global settings.",
                     util.CytoflowViewWarning)
                
            if 'sns_context' in kwargs:
                kwargs.pop('sns_context')
                warn("'sns_context' is ignored when not running in the GUI. Feel free to change the seaborn global settings.",
                     util.CytoflowViewWarning)
                
        if self.size_function:
            group_scale = {}
            s_max = 0.0
            for group_name, group_data in experiment_data.groupby(by = list(locs.index.names), observed = True):
                if group_name not in locs.index:
                    continue
                
                s = self.size_function(group_data)
                try:
                    s = float(s)
                except Exception as e:
                    raise util.CytoflowViewError('size_function',
                                                 'Called size_function on group {} and return type was {}; must return a float!'
                                                 .format(group_name, type(s))) from e
                                                 
                if s < 0.0:
                    raise util.CytoflowViewError('size_function',
                                                 'Called size_function on group {} and return was {}; must be >0!'
                                                 .format(group_name, s))
                if s > s_max:
                    s_max = s
                group_scale[group_name] = s
                
            group_scale = {k : v / s_max for k, v in group_scale.items()}
        
        # set up the data normalizer
        if 'norm' not in kwargs:
            data_scale = util.scale_factory(scale = self.scale,
                                            experiment = experiment,
                                            statistic = self.statistic,
                                            features = [self.feature] 
                                                       if self.feature 
                                                       else list(stat.columns))
            data_norm = data_scale.norm()
        else:
            data_norm = kwargs.pop('norm')
                    
        # find the locations for the groups
        # MST logic: https://github.com/saeyslab/FlowSOM_Python/blob/d8cb6a5934b1ca9c82f4bba27a1f3cd9862a0596/src/flowsom/main.py#L255
        
        # compute the all-pairs distances
        locs_values = locs[self.locations_features].values if self.locations_features else locs.values
        adjacency_graph = scipy.spatial.distance.cdist(locs_values, locs_values, self.metric)
        
        # create a fully-connected graph
        full_graph = igraph.Graph.Weighted_Adjacency(adjacency_graph, mode = "undirected", loops = False)
        
        # normalize the edge weights (necessary??)
        # mst_graph.es["weight"] = mst_graph.es["weight"] / np.mean(mst_graph.es["weight"]) 
        
        # create the spanning tree
        mst_graph = igraph.Graph.spanning_tree(full_graph, weights = full_graph.es["weight"])
        
        # normalize the edge weights (necessary??)
        mst_graph.es["weight"] = mst_graph.es["weight"] / np.mean(mst_graph.es["weight"]) 
        
        # optimize the layout for plotting
        layout = mst_graph.layout_kamada_kawai(seed = mst_graph.layout_grid(),
                                               maxiter = 50 * mst_graph.vcount(),
                                               kkconst = max([mst_graph.vcount(), 1]))
        layout.fit_into((0.05, 0.05, 0.95, 0.95))

        # make the new figure
        fig = plt.figure()
        ax = fig.add_subplot(111)

        # plot the edges connecting nodes
        segments = [[layout.coords[edge[0]], layout.coords[edge[1]]] for edge in mst_graph.get_edgelist()]
        segments = mpl.collections.LineCollection(segments, zorder = 1)
        segments.set_edgecolor("black")
        ax.add_collection(segments)
        
        # save the locations for MSTOp
        self._groups = list(stat.groupby(locations_level, observed = True).groups.keys())
        self._vertices = layout.coords
        
        # now, plot the patches
        
        # since the edge length is uniform (i thiiiiink?), set the radii of the patches
        # to 1/3 of the edge length.
        edge_len = scipy.spatial.distance.euclidean(layout.coords[mst_graph.get_edgelist()[0][0]],
                                                    layout.coords[mst_graph.get_edgelist()[0][1]])
        
        radius = kwargs.pop('radius', edge_len * 0.4)
        if radius < 0 or radius > 1:
            raise util.CytoflowViewError('radius',
                                         'Radius must be between 0 and 1.')
                
        if self.style == "heat":
            cmap_name = kwargs.pop('palette', 'viridis')
            cmap = sns.color_palette(cmap_name, as_cmap = True)
            if isinstance(cmap, list):
                raise util.CytoflowViewError('palette',
                                             "{} is a qualitative (discrete) palette. Choose a continuous one such as 'rocket', 'mako' or 'viridis'")
                  
            patches = []  
            for idx, x in enumerate(stat[self.feature]):
                patch = mpl.patches.Circle(xy = layout.coords[idx], 
                                           radius = radius, 
                                           facecolor = cmap(data_norm(x)),
                                           clip_on = False)
                if self.size_function:
                    patch.set_radius(radius * group_scale[locs.index.to_flat_index()[idx]])
                    
                patch.set(**kwargs)
                    
                patches.append(patch)

            ax.add_collection(mpl.collections.PatchCollection(patches, match_original = True))
                                
            if legend:
                plt.colorbar(mpl.cm.ScalarMappable(norm = data_norm, cmap = cmap),
                             ax = ax,
                             label = legendlabel)

        elif self.style == "pie":
            if self.variable:
                variable_values = list(stat.index.get_level_values(self.variable).unique())
            else:
                variable_values = list(stat.columns)
                
            palette_name = kwargs.pop('palette', 'deep')
            palette = sns.color_palette(palette_name, n_colors = len(variable_values))
            colors = {var: palette[vi] for vi, var in enumerate(variable_values)}
            
            groups = stat.groupby(locations_level, observed = True)
            legend_artists = {}

            for idx, group in groups:
                loc = layout.coords[idx]
                if self.variable:
                    group_data = group[self.feature]
                else:
                    assert(len(group) == 1)
                    group_data = group.iloc[0]
                    
                normed_data = group_data.apply(data_norm)
                normed_data /= normed_data.sum()
                
                theta1 = 0
                for frac_idx, frac in enumerate(normed_data):
                    theta2 = theta1 + frac
                    
                    if self.variable:
                        label = group.index.get_level_values(self.variable).values[frac_idx]
                    else:
                        label = group_data.index[frac_idx]
                        
                    color = colors[label]
                    w = mpl.patches.Wedge(center = loc, 
                                          r = radius, 
                                          theta1 = 360 * theta1, 
                                          theta2 = 360 * theta2, 
                                          facecolor = color,
                                          edgecolor = 'white',
                                          label = label,
                                          clip_on = False)
                        
                    if self.size_function:
                        w.set_radius(w.r * group_scale[locs.index.to_flat_index()[idx]])
                        
                    w.set(**kwargs)
                        
                    ax.add_artist(w)
                    if label not in legend_artists:
                        legend_artists[label] = w
                    theta1 = theta2
                            
            if(legend):
                legend_artists = {k: legend_artists[k] for k in natsorted(legend_artists.keys())}
                ax.legend(handles = legend_artists.values(), 
                          title = legendlabel)
                
        elif self.style == "petal":
            if self.variable:
                variable_values = list(stat.index.get_level_values(self.variable).unique())
            else:
                variable_values = list(stat.columns)
                
            num_wedges = len(variable_values)
            palette_name = kwargs.pop('palette', 'deep')
            palette = sns.color_palette(palette_name, n_colors = len(variable_values))
            colors = {var: palette[vi] for vi, var in enumerate(variable_values)}
            wedge_theta = 360 / num_wedges
        
            groups = stat.groupby(locations_level, observed = True)
            legend_artists = {}
        
            for idx, group in groups:
                loc = layout.coords[idx]
                if self.variable:
                    group_data = group[self.feature]
                else:
                    assert(len(group) == 1)
                    group_data = group.iloc[0]
                    
                normed_data = group_data.apply(data_norm)
                normed_data /= normed_data.max()
        
                theta1 = 0
                for label in variable_values:
                    theta2 = theta1 + wedge_theta
                    
                    if self.variable:
                        if label in group.index.get_level_values(self.variable):
                            frac = normed_data.xs(level = self.variable, key = label).iloc[0]
                        else:
                            frac = 0
                    else:
                        frac = normed_data.loc[label]
        
                    r = radius * math.sqrt(frac)
                    color = colors[label]
                    w = mpl.patches.Wedge(center = loc, 
                                          r = r, 
                                          theta1 = theta1, 
                                          theta2 = theta2, 
                                          label = label,
                                          facecolor = color,
                                          edgecolor = 'white',
                                          clip_on = False)
        
                    if self.size_function:
                        w.set_radius(w.r * group_scale[locs.index.to_flat_index()[idx]])
        
                    w.set(**kwargs)
        
                    ax.add_artist(w)
                    if label not in legend_artists:
                        legend_artists[label] = w
                    theta1 = theta2
        
            if(legend):
                legend_artists = {k: legend_artists[k] for k in natsorted(legend_artists.keys())}
                ax.legend(handles = legend_artists.values(),
                          title = legendlabel,
                          loc = legend_loc)    
        
        # make axes equal (spacing)
        ax.axis('equal')
        ax.set_axis_off()
        
        if title:
            plt.suptitle(title)

            

[docs]
    def enum_plots(self, experiment):
        """
        Enumerate the named plots we can make from this set of statistics.
        
        Returns
        -------
        iterator
            An iterator across the possible plot names. 
        """
        
        if experiment is None:
            raise util.CytoflowViewError('experiment',
                                         "No experiment specified")

        stat = self._get_stat(experiment)
        locs = self._get_locs(experiment)
                
        stat = self._subset_data(self.statistic, stat)
        locs = self._subset_data(self.locations, locs)
        
        if locs.index.nlevels > 1:
            if not self.locations_level:
                raise util.CytoflowViewError('locations_level',
                                             "If 'locations' has more than one level, "
                                             "'locations_level' must be set!")

            if self.locations_level not in locs.index.names:
                raise util.CytoflowViewError('locations_level',
                                             "'locations_level' must be a level in 'locations'")
                
            if self.locations_level not in stat.index.names:
                raise util.CytoflowViewError('locations_level',
                                             "'locations_level' must be a level in 'statistic'")
            
            locations_level = self.locations_level
        else:
            locations_level = locs.index.names[0]
            
            if locations_level not in stat.index.names:
                raise util.CytoflowViewError(f"{locations_level} is the only level in 'locations', but it's not in 'statistic'")
            
        unused_locs_levels = set(locs.index.names) - set([locations_level])
        unused_stat_levels = set(stat.index.names) - set([locations_level])
        
        if self.style == "heat":
            if not self.feature:
                raise util.CytoflowViewError('feature',
                                             "For style 'heat', you must set 'feature'!")
            
            if self.feature not in stat.columns:
                raise util.CytoflowViewError('feature',
                                             "Can't find feature '{}' in the statistic columns."
                                             .format(self.feature))

            
        if self.style != "heat":
            if self.variable:
                if self.variable not in stat.index.names:
                    raise util.CytoflowViewError('variable',
                                                 "Can't find variable '{}' in the statistic index."
                                                 .format(self.variable))
            
                if not self.feature:
                    raise util.CytoflowViewError('feature',
                                                 "For styles 'pie' and 'petal', if you set 'variable', you must set 'feature'!"
                                                 .format(self.variable))
                
                if self.feature not in stat.columns:
                    raise util.CytoflowViewError('feature',
                                                 "Can't find feature '{}' in the statistic columns."
                                                 .format(self.feature))
            
                unused_stat_levels -= set([self.variable])
                
        if unused_locs_levels != unused_stat_levels:
            if self.style != "heat" and self.variable:
                raise util.CytoflowViewError('locations',
                                             f"After removing 'variable' {self.variable}, unused levels in 'statistic' are {unused_stat_levels}; unused levels in 'locations' are {unused_locs_levels}. They should be the same!")
            else:
                raise util.CytoflowViewError('locations',
                                             f"Unused levels in 'statistic' are {unused_stat_levels}; unused levels in 'locations' are {unused_locs_levels}. They should be the same!")

        if unused_stat_levels:
            stat_names = [x for x in stat.index.names if x in unused_stat_levels]
            stat_groupby = stat.groupby(stat_names, observed = True)
            return util.IterByWrapper(iter(stat_groupby.groups), stat_names)
        else:
            return util.IterByWrapper(iter([]), [])    

        
    def _get_stat(self, experiment):
        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")
        
        if not self.statistic:
            raise util.CytoflowViewError('statistic', "Statistic not set")
        
        if self.statistic not in experiment.statistics:
            raise util.CytoflowViewError('statistic',
                                         "Can't find the statistic {} in the experiment"
                                         .format(self.statistic))
            
        stat = experiment.statistics[self.statistic]
                
        return stat
    
    def _get_locs(self, experiment):
        
        if experiment is None:
            raise util.CytoflowViewError('experiment', "No experiment specified")
        
        if not self.locations:
            raise util.CytoflowViewError('location', "Location not set")
        
        if self.locations not in experiment.statistics:
            raise util.CytoflowViewError('location',
                                         "Can't find the location statistic {} in the experiment"
                                         .format(self.locations))
            
        locs = experiment.statistics[self.locations]

        return locs
    
    def _subset_data(self, stat_name, data):
        
        if self.subset:
            try:
                # TODO - either sanitize column names, or check to see that
                # all conditions are valid Python variables
                data = data.query(self.subset)
            except Exception as e:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{}' isn't valid for '{}'"
                                             .format(self.subset, stat_name)) from e
                
            if len(data) == 0:
                raise util.CytoflowViewError('subset',
                                             "Subset string '{}' returned no values from '{}'"
                                             .format(self.subset, stat_name))
                
        names = list(data.index.names)
        
        for name in names:
            unique_values = data.index.get_level_values(name).unique()
            if len(unique_values) == 1:
                warn("Only one value for level {} in {}; dropping it.".format(name, stat_name),
                     util.CytoflowViewWarning)
                try:
                    data.index = data.index.droplevel(name)
                except AttributeError as e:
                    raise util.CytoflowViewError(None,
                                                 "Must have more than one "
                                                 "value to plot.") from e
                                                 
        # droplevel makes this a plain Index instead of a MultiIndex. no bueno.
        if not isinstance(data.index, pd.MultiIndex):
            data.index = pd.MultiIndex.from_tuples([(x,) for x in data.index.to_list()], 
                                                   names = [data.index.name])

        return data

        
    
# def pie(x, loc, **kwargs):