Source code for cytoflowgui.workflow.operations.xform_stat
#!/usr/bin/env python3.8
# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
cytoflowgui.workflow.operations.xform_stat
------------------------------------------
"""
import numpy as np
import scipy.stats
import pandas
from warnings import warn
from traits.api import (Str, Callable, Property, List, provides, observe, Tuple, Undefined)
import cytoflow.utility as util
from cytoflow import TransformStatisticOp
from cytoflowgui.workflow.serialization import camel_registry, traits_repr
from .operation_base import IWorkflowOperation, WorkflowOperation
from ..subset import ISubset
TransformStatisticOp.__repr__ = traits_repr
mean_95ci = lambda x: util.ci(x, np.mean, boots = 100)
geomean_95ci = lambda x: util.ci(x, util.geom_mean, boots = 100)
transform_functions = {"Mean" : np.mean,
"Geom.Mean" : util.geom_mean,
"Median" : np.median,
"Count" : len,
"Std.Dev" : np.std,
"Geom.SD" : util.geom_sd_range,
"SEM" : scipy.stats.sem,
"Geom.SEM" : util.geom_sem_range,
"Mean 95% CI" : mean_95ci,
"Geom.Mean 95% CI" : geomean_95ci,
"Sum" : np.sum,
"Proportion" : lambda a: pandas.Series(a / a.sum()),
"Percentage" : lambda a: pandas.Series(a / a.sum()) * 100.0,
"Fold" : lambda a: pandas.Series(a / a.min())
}
[docs]@provides(IWorkflowOperation)
class TransformStatisticWorkflowOp(WorkflowOperation, TransformStatisticOp):
name = Str(apply = True)
statistic = Tuple(Str, Str, apply = True)
statistic_name = Str(apply = True)
by = List(Str, apply = True)
# override the base class's "subset" with one that is dynamically generated /
# updated from subset_list
subset = Property(Str, observe = "subset_list.items.str")
subset_list = List(ISubset, apply = True)
# functions aren't picklable, so send the name instead
function = Callable(transient = True)
# automagically pick a good fill
fill = Undefined
# MAGIC - returns the value of the 'fill' property
# def _get_fill(self):
# if self.statistic_name:
# return fill[self.statistic_name]
# else:
# return 0
# bits to support the subset editor
@observe('subset_list:items.str')
def _on_subset_changed(self, _):
self.changed = 'subset_list'
# MAGIC - returns the value of the "subset" Property, above
def _get_subset(self):
return " and ".join([subset.str for subset in self.subset_list if subset.str])
[docs] def apply(self, experiment):
if not self.statistic_name:
raise util.CytoflowOpError("Transform function not set")
self.function = transform_functions[self.statistic_name]
ret = TransformStatisticOp.apply(self, experiment)
stat = ret.statistics[(self.name, self.statistic_name)]
if Undefined in stat:
warn("One of the transformed values was Undefined. "
"Subsequent operations may fail. "
"Please report this as a bug! ")
return ret
[docs] def get_notebook_code(self, idx):
op = TransformStatisticOp()
op.copy_traits(self, [x for x in op.copyable_trait_names() if x != 'fill'])
fn_import = {"Mean" : "from numpy import mean",
"Median" : "from numpy import median",
"Geom.Mean" : None,
"Count" : None,
"Std.Dev" : "from numpy import std",
"Geom.SD" : None,
"SEM" : "from scipy.stats import sem",
"Geom.SEM" : None,
"Mean 95% CI" : None,
"Geom.Mean 95% CI" : None,
"Sum" : "from numpy import sum",
"Proportion" : "from pandas import Series",
"Percentage" : "from pandas import Series",
"Fold" : "from pandas import Series"
}
fn_name = {"Mean" : "mean",
"Median" : "median",
"Geom.Mean" : "geom_mean",
"Count" : "len",
"Std.Dev" : "std",
"Geom.SD" : "geom_sd_range",
"SEM" : "sem",
"Geom.SEM" : "geom_sem_range",
"Mean 95% CI" : "lambda x: ci(x, mean, boots = 100)",
"Geom.Mean 95% CI" : "lambda x: ci(x, geom_mean, boots = 100)",
"Sum" : "sum",
"Proportion" : "lambda a: Series(a / a.sum())",
"Percentage" : "lambda a: Series(a / a.sum()) * 100.0",
"Fold" : "lambda a: Series(a / a.min())"
}
op.function = transform_functions[self.statistic_name]
try:
op.function.__name__ = fn_name[self.statistic_name]
except AttributeError:
# can't reassign the name of "len", for example
pass
return "\n{import_statement}\nop_{idx} = {repr}\n\nex_{idx} = op_{idx}.apply(ex_{prev_idx})" \
.format(import_statement = (fn_import[self.statistic_name]
if fn_import[self.statistic_name] is not None
else ""),
repr = repr(op),
idx = idx,
prev_idx = idx - 1)
### Serialization
@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 1)
def _dump(op):
return dict(name = op.name,
statistic = op.statistic,
statistic_name = op.statistic_name,
by = op.by,
subset_list = op.subset_list)
@camel_registry.loader('transform-statistic', version = 1)
def _load(data, version):
data['statistic'] = tuple(data['statistic'])
return TransformStatisticWorkflowOp(**data)