Source code for cytoflowgui.workflow.operations.xform_stat

#!/usr/bin/env python3.8

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflowgui.workflow.operations.xform_stat
------------------------------------------

"""

import numpy as np
import scipy.stats
import pandas
from warnings import warn

from traits.api import (Str, Callable, Property, List, provides, observe, 
                        Undefined, Bool)  # @UnresolvedImport

import cytoflow.utility as util
from cytoflow import TransformStatisticOp
                       
from cytoflowgui.workflow.serialization import camel_registry, cytoflow_class_repr
from .operation_base import IWorkflowOperation, WorkflowOperation

from ..subset import ISubset

TransformStatisticOp.__repr__ = cytoflow_class_repr

mean_95ci = lambda x: util.ci(x, np.mean, boots = 100)
geomean_95ci = lambda x: util.ci(x, util.geom_mean, boots = 100)

transform_functions = {"Mean" : np.mean,
                       "Median" : np.median,
                       "Geom.Mean" : util.geom_mean,
                       "Count" : len,
                       "Std.Dev" : np.std,
                       "Geom.Std.Dev" : util.geom_sd,
                       "SEM" : scipy.stats.sem,
                       "Sum" : np.sum,
                       "Proportion" : lambda a: pandas.Series(a / a.sum()),
                       "Percentage" : lambda a: pandas.Series(a / a.sum()) * 100.0,
                       "Fold" : lambda a: pandas.Series(a / a.min())
                       }


[docs] @provides(IWorkflowOperation) class TransformStatisticWorkflowOp(WorkflowOperation, TransformStatisticOp): name = Str(apply = True) statistic = Str(apply = True) feature = Str(apply = True) function_name = Str(apply = True) by = List(Str, apply = True) ignore_incomplete_groups = Bool(True, apply = True) # override the base class's "subset" with one that is dynamically generated / # updated from subset_list subset = Property(Str, observe = "subset_list.items.str") subset_list = List(ISubset, apply = True) # functions aren't picklable, so send the name instead function = Callable(transient = True) fill = 0 # bits to support the subset editor @observe('subset_list:items.str') def _on_subset_changed(self, _): self.changed = 'subset_list' # MAGIC - returns the value of the "subset" Property, above def _get_subset(self): return " and ".join([subset.str for subset in self.subset_list if subset.str])
[docs] def apply(self, experiment): if not self.function_name: raise util.CytoflowOpError("Transform function not set") self.function = transform_functions[self.function_name] ret = TransformStatisticOp.apply(self, experiment) stat = ret.statistics[self.name] if Undefined in stat: warn("One of the transformed values was Undefined. " "Subsequent operations may fail. " "Please report this as a bug! ") return ret
[docs] def clear_estimate(self): # no-op return
[docs] def get_notebook_code(self, idx): op = TransformStatisticOp() op.copy_traits(self, [x for x in op.copyable_trait_names() if x != 'fill']) fn_import = {"Mean" : "import numpy as np", "Median" : "import numpy as np", "Std.Dev" : "from numpy import std", "Geom.Std.Dev" : None, "Geom.SD" : None, "SEM" : "from scipy.stats import sem", "Geom.SEM" : None, "Mean 95% CI" : None, "Geom.Mean 95% CI" : None, "Sum" : "from numpy import sum", "Proportion" : "from pandas import Series", "Percentage" : "from pandas import Series", "Fold" : "from pandas import Series" } fn_repr = {"Mean" : "np.mean", "Median" : "np.median", "Geom.Mean" : "geom_mean", "Count" : "len", "Std.Dev" : "std", "Geom.Std.Dev" : "geom_sd", "SEM" : "sem", "Sum" : "sum", "Proportion" : "lambda a: Series(a / a.sum())", "Percentage" : "lambda a: Series(a / a.sum()) * 100.0", "Fold" : "lambda a: Series(a / a.min())" } op.function = transform_functions[self.function_name] try: op.function.__name__ = fn_repr[self.function_name] except AttributeError: # can't reassign the name of "len", for example pass return "\n{import_statement}\nop_{idx} = {repr}\n\nex_{idx} = op_{idx}.apply(ex_{prev_idx})" \ .format(import_statement = (fn_import[self.function_name] if self.function_name in fn_import else ""), repr = repr(op), idx = idx, prev_idx = idx - 1)
### Serialization @camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 4) def _dump_v4(op): return dict(name = op.name, statistic = op.statistic, feature = op.feature, function_name = op.function_name, ignore_incomplete_groups = op.ignore_incomplete_groups, by = op.by, subset_list = op.subset_list) @camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 3) def _dump_v3(op): return dict(name = op.name, statistic = op.statistic, feature = op.feature, function_name = op.function_name, by = op.by, subset_list = op.subset_list) @camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 2) def _dump_v2(op): return dict(name = op.name, statistic = op.statistic, function_name = op.function_name, by = op.by, subset_list = op.subset_list) @camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 1) def _dump_v1(op): return dict(name = op.name, statistic = op.statistic, statistic_name = op.function_name, by = op.by, subset_list = op.subset_list) @camel_registry.loader('transform-statistic', version = any) def _load(data, version): return TransformStatisticWorkflowOp(**data) @camel_registry.loader('transform-statistic', version = 1) def _load_v1(data, version): data['statistic'] = tuple(data['statistic'])[0] del data['statistic_name'] # TODO - some warning about how stats have changed. return TransformStatisticWorkflowOp(**data)