Source code for cytoflowgui.workflow.operations.channel_stat

#!/usr/bin/env python3.8

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflowgui.workflow.operations.channel_stat
--------------------------------------------

"""

import logging
import numpy as np
import pandas as pd
import scipy.stats

from traits.api import (Str, Callable, Property, List, provides, observe)

import cytoflow.utility as util
from cytoflow import ChannelStatisticOp
                       
from cytoflowgui.workflow.serialization import camel_registry, cytoflow_class_repr
from .operation_base import IWorkflowOperation, WorkflowOperation

from ..subset import ISubset

mean_95ci = lambda x: util.ci(x, np.mean, boots = 100)
geomean_95ci = lambda x: util.ci(x, util.geom_mean, boots = 100)

summary_functions = {"Mean" : np.mean,
                     "Mean +- SD" : lambda x: pd.Series({"Mean" : x.mean(),
                                                         "+SD" : x.mean() + x.std(),
                                                         "-SD" : x.mean() - x.std()}),
                     "Geo.Mean" : util.geom_mean,
                     "Geo.Mean */ SD" : lambda x: pd.Series({"Geo.Mean" : util.geom_mean(x),
                                                             "*SD" : util.geom_mean(x) * util.geom_sd(x),
                                                             "/SD" : util.geom_mean(x) / util.geom_sd(x)}),
                     "Median" : np.median,
                     "Count" : len,
                     "Std.Dev" : np.std,
                     "Geo.SD" : util.geom_sd,
                     "SEM" : scipy.stats.sem,
                     "Geo.SEM" : util.geom_sem,
                     "Mean +- SEM" : lambda x: pd.Series({"Mean" : x.mean(),
                                                          "+SEM" : x.mean() + scipy.stats.sem(x),
                                                          "-SEM" : x.mean() - scipy.stats.sem(x)}),
                     "Geo.Mean */ SEM" : lambda x: pd.Series({"Geo.Mean" : util.geom_mean(x),
                                                              "*SEM" : util.geom_mean(x) * util.geom_sem(x),
                                                              "/SEM" : util.geom_mean(x) / util.geom_sem(x)}),
                     "Mean & 95% CI" : lambda x: pd.Series({"Mean" : x.mean(),
                                                             "-CI" : util.ci(x, lambda x: x.mean(), boots = 100)[0],
                                                             "+CI" : util.ci(x, lambda x: x.mean(), boots = 100)[1]}),
                     "Geo.Mean & 95% CI" : lambda x: pd.Series({"Geo.Mean" : util.geom_mean(x),
                                                                "-CI" : util.ci(x, util.geom_mean, boots = 100)[0],
                                                                "+CI" : util.ci(x, util.geom_mean, boots = 100)[1]}),
                     }

# fill = {"Mean +- SD" : 0,
#         "Geo.Mean */ SD" : 0,
#         "Median" : 0,
#         "Count" : 0,
#         "Std.Dev" : 0,
#         "Geo.SD" : 0,
#         "SEM" : 0,
#         "Mean +- SEM" : 0,
#         "Geo.Mean */ SEM" : 0,
#         "Mean +- 95% CI" : 0,
#         "Geo.Mean */ 95% CI" : 0
#         }

ChannelStatisticOp.__repr__ = cytoflow_class_repr

[docs] @provides(IWorkflowOperation) class ChannelStatisticWorkflowOp(WorkflowOperation, ChannelStatisticOp): # operation traits name = Str(apply = True) channel = Str(apply = True) function_name = Str(apply = True) by = List(Str, apply = True) # override the base class's "subset" with one that is dynamically generated / # updated from subset_list subset = Property(Str, observe = "subset_list.items.str") subset_list = List(ISubset, apply = True) # functions aren't picklable, so make this one transient # and send the name instead function = Callable(transient = True) # automatically pick a good fill # fill = Property(Any, observe = 'function_name', transient = True) fill = 0 # MAGIC - returns the value of the 'fill' property # def _get_fill(self): # if self.function_name: # return fill[self.function_name] # else: # return 0 # bits to support the subset editor @observe('subset_list:items.str') def _on_subset_changed(self, _): self.changed = 'subset_list' # MAGIC - returns the value of the "subset" Property, above def _get_subset(self): return " and ".join([subset.str for subset in self.subset_list if subset.str])
[docs] def apply(self, experiment): if not self.function_name: raise util.CytoflowOpError('function_name', "Summary function isn't set") self.function = summary_functions[self.function_name] return ChannelStatisticOp.apply(self, experiment)
[docs] def clear_estimate(self): # no-op return
[docs] def get_notebook_code(self, idx): op = ChannelStatisticOp() op.copy_traits(self, op.copyable_trait_names()) fn_import = {"Mean" : "import numpy as np", "Mean +- SD" : "import pandas as pd", "Geo.Mean */ SD" : "import pandas as pd", "Median" : "import numpy as np", "Std.Dev" : "import numpy as np", "SEM" : "import scipy.stats", "Mean +- SEM" : "import scipy.stats\nimport pandas as pd", "Geo.Mean */ SEM" : "import pandas as pd", "Mean & 95% CI" : "import pandas as pd" } fn_repr = { "Mean" : 'np.mean', "Mean +- SD" : 'lambda x: pd.Series({"Mean" : x.mean(), "+SD" : x.mean() + x.std(), "-SD" : x.mean() - x.std()})', "Geo.Mean */ SD" : 'lambda x: pd.Series({"Geo.Mean" : geom_mean(x), "*SD" : geom_mean(x) * geom_sd(x), "/SD" : geom_mean(x) / geom_sd(x)})', "Median" : 'np.median', "Std.Dev" : 'np.std', "SEM" : 'scipy.stats.sem', "Mean +- SEM" : 'lambda x: pd.Series({"Mean" : x.mean(), "+SEM" : x.mean() + scipy.stats.sem(x), "-SEM" : x.mean() - scipy.stats.sem(x)})', "Geo.Mean */ SEM" : 'lambda x: pd.Series({"Geo.Mean" : geom_mean(x), "*SEM" : geom_mean(x) * geom_sem(x), "/SEM" : geom_mean(x) / geom_sem(x)})', "Mean & 95% CI" : 'lambda x: pd.Series({"Mean" : x.mean(), "-CI" : ci(x, lambda x: x.mean(), boots = 100)[0], "+CI" : ci(x, lambda x: x.mean(), boots = 100)[1]})', "Geo.Mean & 95% CI" : 'lambda x: pd.Series({"Geo.Mean" : geom_mean(x), "-CI" : ci(x, geom_mean, boots = 100)[0], "+CI" : ci(x, geom_mean, boots = 100)[1]})', } op.function = summary_functions[self.function_name] try: if self.function_name in fn_repr: op.function.__name__ = fn_repr[self.function_name] except AttributeError: # this doesn't work for builtins like "len" pass return "\n{import_statement}\nop_{idx} = {repr}\n\nex_{idx} = op_{idx}.apply(ex_{prev_idx})"\ .format(import_statement = (fn_import[self.function_name] if self.function_name in fn_import else ""), repr = repr(op), idx = idx, prev_idx = idx - 1)
### Serialization @camel_registry.dumper(ChannelStatisticWorkflowOp, 'channel-statistic', version = 2) def _dump(op): return dict(name = op.name, channel = op.channel, function_name = op.function_name, by = op.by, subset_list = op.subset_list) @camel_registry.dumper(ChannelStatisticWorkflowOp, 'channel-statistic', version = 1) def _dump_v1(op): return dict(name = op.name, channel = op.channel, statistic_name = op.statistic_name, by = op.by, subset_list = op.subset_list) @camel_registry.loader('channel-statistic', version = 2) def _load(data, version): return ChannelStatisticWorkflowOp(**data) @camel_registry.loader('channel-statistic', version = 1) def _load_v1(data, version): del data["statistic_name"] logging.warn("Statistics have changed substantially since you saved this " ".flow file, so you'll need to reset a few things. " "See the FAQ in the online documentation for details.") return ChannelStatisticWorkflowOp(**data)