Source code for cytoflowgui.workflow.operations.xform_stat
#!/usr/bin/env python3.8
# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
cytoflowgui.workflow.operations.xform_stat
------------------------------------------
"""
import numpy as np
import scipy.stats
import pandas
from warnings import warn
from traits.api import (Str, Callable, Property, List, provides, observe,
Undefined, Bool) # @UnresolvedImport
import cytoflow.utility as util
from cytoflow import TransformStatisticOp
from cytoflowgui.workflow.serialization import camel_registry, cytoflow_class_repr
from .operation_base import IWorkflowOperation, WorkflowOperation
from ..subset import ISubset
TransformStatisticOp.__repr__ = cytoflow_class_repr
mean_95ci = lambda x: util.ci(x, np.mean, boots = 100)
geomean_95ci = lambda x: util.ci(x, util.geom_mean, boots = 100)
transform_functions = {"Mean" : np.mean,
"Median" : np.median,
"Geom.Mean" : util.geom_mean,
"Count" : len,
"Std.Dev" : np.std,
"Geom.Std.Dev" : util.geom_sd,
"SEM" : scipy.stats.sem,
"Sum" : np.sum,
"Proportion" : lambda a: pandas.Series(a / a.sum()),
"Percentage" : lambda a: pandas.Series(a / a.sum()) * 100.0,
"Fold" : lambda a: pandas.Series(a / a.min())
}
[docs]
@provides(IWorkflowOperation)
class TransformStatisticWorkflowOp(WorkflowOperation, TransformStatisticOp):
name = Str(apply = True)
statistic = Str(apply = True)
feature = Str(apply = True)
function_name = Str(apply = True)
by = List(Str, apply = True)
ignore_incomplete_groups = Bool(True, apply = True)
# override the base class's "subset" with one that is dynamically generated /
# updated from subset_list
subset = Property(Str, observe = "subset_list.items.str")
subset_list = List(ISubset, apply = True)
# functions aren't picklable, so send the name instead
function = Callable(transient = True)
fill = 0
# bits to support the subset editor
@observe('subset_list:items.str')
def _on_subset_changed(self, _):
self.changed = 'subset_list'
# MAGIC - returns the value of the "subset" Property, above
def _get_subset(self):
return " and ".join([subset.str for subset in self.subset_list if subset.str])
[docs]
def apply(self, experiment):
if not self.function_name:
raise util.CytoflowOpError("Transform function not set")
self.function = transform_functions[self.function_name]
ret = TransformStatisticOp.apply(self, experiment)
stat = ret.statistics[self.name]
if Undefined in stat:
warn("One of the transformed values was Undefined. "
"Subsequent operations may fail. "
"Please report this as a bug! ")
return ret
[docs]
def get_notebook_code(self, idx):
op = TransformStatisticOp()
op.copy_traits(self, [x for x in op.copyable_trait_names() if x != 'fill'])
fn_import = {"Mean" : "import numpy as np",
"Median" : "import numpy as np",
"Std.Dev" : "from numpy import std",
"Geom.Std.Dev" : None,
"Geom.SD" : None,
"SEM" : "from scipy.stats import sem",
"Geom.SEM" : None,
"Mean 95% CI" : None,
"Geom.Mean 95% CI" : None,
"Sum" : "from numpy import sum",
"Proportion" : "from pandas import Series",
"Percentage" : "from pandas import Series",
"Fold" : "from pandas import Series"
}
fn_repr = {"Mean" : "np.mean",
"Median" : "np.median",
"Geom.Mean" : "geom_mean",
"Count" : "len",
"Std.Dev" : "std",
"Geom.Std.Dev" : "geom_sd",
"SEM" : "sem",
"Sum" : "sum",
"Proportion" : "lambda a: Series(a / a.sum())",
"Percentage" : "lambda a: Series(a / a.sum()) * 100.0",
"Fold" : "lambda a: Series(a / a.min())"
}
op.function = transform_functions[self.function_name]
try:
op.function.__name__ = fn_repr[self.function_name]
except AttributeError:
# can't reassign the name of "len", for example
pass
return "\n{import_statement}\nop_{idx} = {repr}\n\nex_{idx} = op_{idx}.apply(ex_{prev_idx})" \
.format(import_statement = (fn_import[self.function_name]
if self.function_name in fn_import
else ""),
repr = repr(op),
idx = idx,
prev_idx = idx - 1)
### Serialization
@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 4)
def _dump_v4(op):
return dict(name = op.name,
statistic = op.statistic,
feature = op.feature,
function_name = op.function_name,
ignore_incomplete_groups = op.ignore_incomplete_groups,
by = op.by,
subset_list = op.subset_list)
@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 3)
def _dump_v3(op):
return dict(name = op.name,
statistic = op.statistic,
feature = op.feature,
function_name = op.function_name,
by = op.by,
subset_list = op.subset_list)
@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 2)
def _dump_v2(op):
return dict(name = op.name,
statistic = op.statistic,
function_name = op.function_name,
by = op.by,
subset_list = op.subset_list)
@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 1)
def _dump_v1(op):
return dict(name = op.name,
statistic = op.statistic,
statistic_name = op.function_name,
by = op.by,
subset_list = op.subset_list)
@camel_registry.loader('transform-statistic', version = any)
def _load(data, version):
return TransformStatisticWorkflowOp(**data)
@camel_registry.loader('transform-statistic', version = 1)
def _load_v1(data, version):
data['statistic'] = tuple(data['statistic'])[0]
del data['statistic_name']
# TODO - some warning about how stats have changed.
return TransformStatisticWorkflowOp(**data)