Source code for cytoflow.operations.binning

#!/usr/bin/env python3.8
# coding: latin-1

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflow.operations.binning
---------------------------

`binning` has two classes:

`BinningOp` -- divides events in a channel into bins of equal 
width (after applying an optional scale)

`BinningView` -- a default view to display the bins.
"""

from traits.api import (HasStrictTraits, Str, provides, Constant, Int)
import numpy as np

from cytoflow.views import IView, HistogramView
import cytoflow.utility as util

from .i_operation import IOperation
from .base_op_views import Op1DView, AnnotatingView

[docs]@provides(IOperation) class BinningOp(HasStrictTraits): """ Bin data along an axis. This operation creates equally spaced bins (in linear or log space) along an axis and adds a condition assigning each event to a bin. The value of the event's condition is the left end of the bin's interval in which the event is located. Attributes ---------- name : Str The operation name. Used to name the new metadata field in the experiment that's created by apply() channel : Str The name of the channel along which to bin. scale : {"linear", "log", "logicle"} Make the bins equidistant along what scale? bin_width : Float The width of the bins. If `scale` is ``log``, `bin_width` is in log-10 units; if `scale` is ``logicle``, an error is thrown because the units are ill-defined. Examples -------- Create a small experiment: .. plot:: :context: close-figs >>> import cytoflow as flow >>> import_op = flow.ImportOp() >>> import_op.tubes = [flow.Tube(file = "tasbe/rby.fcs")] >>> ex = import_op.apply() Create and parameterize the operation .. plot:: :context: close-figs >>> bin_op = flow.BinningOp() >>> bin_op.name = "Bin" >>> bin_op.channel = "FITC-A" >>> bin_op.scale = "log" >>> bin_op.bin_width = 0.2 Apply the operation to the experiment .. plot:: :context: close-figs >>> ex2 = bin_op.apply(ex) Plot the result .. plot:: :context: close-figs >>> bin_op.default_view().plot(ex2) """ # traits id = Constant('edu.mit.synbio.cytoflow.operations.binning') friendly_id = Constant("Binning") name = Str bin_count_name = Str channel = Str bin_width = util.PositiveFloat(None, allow_zero = False, allow_none = True) scale = util.ScaleEnum _max_num_bins = Int(100)
[docs] def apply(self, experiment): """ Applies the binning to an experiment. Parameters ---------- experiment : `Experiment` the old experiment to which this op is applied Returns ------- `Experiment` A new experiment with a condition column named `name`, which contains the location of the left-most edge of the bin that the event is in. """ if experiment is None: raise util.CytoflowOpError('experiment', "no experiment specified") if not self.name: raise util.CytoflowOpError('name', "Name is not set") if self.name != util.sanitize_identifier(self.name): raise util.CytoflowOpError('name', "Name can only contain letters, numbers and underscores." .format(self.name)) if self.name in experiment.data.columns: raise util.CytoflowOpError('name', "Name {} is in the experiment already" .format(self.name)) if self.bin_count_name and self.bin_count_name in experiment.data.columns: raise util.CytoflowOpError('bin_count_name', "bin_count_name {} is in the experiment already" .format(self.bin_count_name)) if not self.channel: raise util.CytoflowOpError('channel', "channel is not set") if self.channel not in experiment.data.columns: raise util.CytoflowOpError('channel', "channel {} isn't in the experiment" .format(self.channel)) if self.bin_width is None: raise util.CytoflowOpError('bin_width', "must set bin width") if not (self.scale == "linear" or self.scale == "log"): raise util.CytoflowOpError('scale', "Can only use binning op with linear or log scale") scale = util.scale_factory(self.scale, experiment, channel = self.channel) scaled_min = scale(scale.clip(experiment.data[self.channel]).min()) scaled_max = scale(scale.clip(experiment.data[self.channel]).max()) if self.scale == 'linear': start = 0 else: start = 1 scaled_bins_left = np.arange(start = -1.0 * start, stop = (-1.0 * scaled_min) + self.bin_width, step = self.bin_width) * -1.0 scaled_bins_left = scaled_bins_left[::-1][:-1] scaled_bins_right = np.arange(start = start, stop = scaled_max + self.bin_width, step = self.bin_width) scaled_bins = np.append(scaled_bins_left, scaled_bins_right) if len(scaled_bins) > self._max_num_bins: raise util.CytoflowOpError(None, "Too many bins! To increase this limit, " "change _max_num_bins (currently {})" .format(self._max_num_bins)) if len(scaled_bins) < 2: raise util.CytoflowOpError('bin_width', "Must have more than one bin") # now, back into data space bins = scale.inverse(scaled_bins) # reduce to 4 sig figs bins = ['%.4g' % x for x in bins] bins = [float(x) for x in bins] bins = np.array(bins) # put the data in bins bin_idx = np.digitize(experiment.data[self.channel], bins[1:-1]) new_experiment = experiment.clone(deep = False) new_experiment.add_condition(self.name, "float64", bins[bin_idx]) # keep track of the bins we used, for prettier plotting later. new_experiment.metadata[self.name]["bin_scale"] = self.scale new_experiment.metadata[self.name]["bins"] = bins if self.bin_count_name: # TODO - this is a HUGE memory hog?! # TODO - fix this, then turn it on by default agg_count = new_experiment.data.groupby(self.name).count() agg_count = agg_count[agg_count.columns[0]] # have to make the condition a float64, because if we're in log # space there may be events that have NaN as the bin number. new_experiment.add_condition( self.bin_count_name, "float64", new_experiment[self.name].map(agg_count)) new_experiment.history.append(self.clone_traits(transient = lambda _: True)) return new_experiment
[docs] def default_view(self, **kwargs): """ Returns a diagnostic plot to check the binning. Returns ------- `IView` An view instance, call `plot()` to plot the bins. """ v = BinningView(op = self) v.trait_set(**kwargs) return v
[docs]@provides(IView) class BinningView(Op1DView, AnnotatingView, HistogramView): """ Plots a histogram of the current binning op. By default, the different bins are shown in different colors. Attributes ---------- """ id = Constant('edu.mit.synbio.cytoflow.views.binning') friendly_id = Constant('Binning Setup')
[docs] def plot(self, experiment, **kwargs): """ Plot the histogram. Parameters ---------- """ self.huescale = self.op.scale view, trait_name = self._strip_trait(self.op.name) super(BinningView, view).plot(experiment, annotation_facet = self.op.name, annotation_trait = trait_name, annotations = {}, **kwargs)
util.expand_class_attributes(BinningView) util.expand_method_parameters(BinningView, BinningView.plot)