Source code for cytoflowgui.workflow.operations.xform_stat

#!/usr/bin/env python3.8

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflowgui.workflow.operations.xform_stat
------------------------------------------

"""

import numpy as np
import scipy.stats
import pandas
from warnings import warn

from traits.api import (Str, Callable, Property, List, provides, observe, 
                        Undefined, Bool)  # @UnresolvedImport

import cytoflow.utility as util
from cytoflow import TransformStatisticOp
                       
from cytoflowgui.workflow.serialization import camel_registry, cytoflow_class_repr
from .operation_base import IWorkflowOperation, WorkflowOperation

from ..subset import ISubset

TransformStatisticOp.__repr__ = cytoflow_class_repr

mean_95ci = lambda x: util.ci(x, np.mean, boots = 100)
geomean_95ci = lambda x: util.ci(x, util.geom_mean, boots = 100)

transform_functions = {"Mean" : np.mean,
                       "Median" : np.median,
                       "Geom.Mean" : util.geom_mean,
                       "Count" : len,
                       "Std.Dev" : np.std,
                       "Geom.Std.Dev" : util.geom_sd,
                       "SEM" : scipy.stats.sem,
                       "Sum" : np.sum,
                       "Proportion" : lambda a: pandas.Series(a / a.sum()),
                       "Percentage" : lambda a: pandas.Series(a / a.sum()) * 100.0,
                       "Fold" : lambda a: pandas.Series(a / a.min())
                       }



[docs]
@provides(IWorkflowOperation)
class TransformStatisticWorkflowOp(WorkflowOperation, TransformStatisticOp):
    name = Str(apply = True)
    statistic = Str(apply = True)
    feature = Str(apply = True)
    function_name = Str(apply = True)
    by = List(Str, apply = True)  
    ignore_incomplete_groups = Bool(True, apply = True)
    
    # override the base class's "subset" with one that is dynamically generated /
    # updated from subset_list
    subset = Property(Str, observe = "subset_list.items.str")
    subset_list = List(ISubset, apply = True)

    # functions aren't picklable, so send the name instead
    function = Callable(transient = True)
    
    fill = 0
        
    # bits to support the subset editor
    @observe('subset_list:items.str')
    def _on_subset_changed(self, _):
        self.changed = 'subset_list'
        
    # MAGIC - returns the value of the "subset" Property, above
    def _get_subset(self):
        return " and ".join([subset.str for subset in self.subset_list if subset.str])
    

[docs]
    def apply(self, experiment):
        if not self.function_name:
            raise util.CytoflowOpError("Transform function not set")
        
        self.function = transform_functions[self.function_name]
        
        ret = TransformStatisticOp.apply(self, experiment)
        
        stat = ret.statistics[self.name]
        
        if Undefined in stat:
            warn("One of the transformed values was Undefined. "
                 "Subsequent operations may fail. "
                 "Please report this as a bug! ")
                    
        return ret

    

[docs]
    def clear_estimate(self):
        # no-op
        return



[docs]
    def get_notebook_code(self, idx):
        op = TransformStatisticOp()
        op.copy_traits(self, [x for x in op.copyable_trait_names() if x != 'fill'])
        
        fn_import = {"Mean" : "import numpy as np",
                     "Median" : "import numpy as np",
                     "Std.Dev" : "from numpy import std",
                     "Geom.Std.Dev" : None,
                     "Geom.SD" : None,
                     "SEM" : "from scipy.stats import sem",
                     "Geom.SEM" : None,
                     "Mean 95% CI" : None,
                     "Geom.Mean 95% CI" : None,
                     "Sum" : "from numpy import sum",
                     "Proportion" : "from pandas import Series",
                     "Percentage" : "from pandas import Series",
                     "Fold" : "from pandas import Series"
                  }
        
        fn_repr = {"Mean" : "np.mean",
                   "Median" : "np.median",
                   "Geom.Mean" : "geom_mean",
                   "Count" : "len",
                   "Std.Dev" : "std",
                   "Geom.Std.Dev" : "geom_sd",
                   "SEM" : "sem",
                   "Sum" : "sum",
                   "Proportion" : "lambda a: Series(a / a.sum())",
                   "Percentage" : "lambda a: Series(a / a.sum()) * 100.0",
                   "Fold" : "lambda a: Series(a / a.min())"
                   }
        
        op.function = transform_functions[self.function_name]
        try:
            op.function.__name__ = fn_repr[self.function_name]
        except AttributeError:
            # can't reassign the name of "len", for example
            pass
        
        return "\n{import_statement}\nop_{idx} = {repr}\n\nex_{idx} = op_{idx}.apply(ex_{prev_idx})" \
            .format(import_statement = (fn_import[self.function_name]
                                        if self.function_name in fn_import
                                        else ""),
                repr = repr(op),
                idx = idx,
                prev_idx = idx - 1) 


            
### Serialization
@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 4)
def _dump_v4(op):
    return dict(name = op.name,
                statistic = op.statistic,
                feature = op.feature,
                function_name = op.function_name,
                ignore_incomplete_groups = op.ignore_incomplete_groups,
                by = op.by,
                subset_list = op.subset_list)

@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 3)
def _dump_v3(op):
    return dict(name = op.name,
                statistic = op.statistic,
                feature = op.feature,
                function_name = op.function_name,
                by = op.by,
                subset_list = op.subset_list)

@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 2)
def _dump_v2(op):
    return dict(name = op.name,
                statistic = op.statistic,
                function_name = op.function_name,
                by = op.by,
                subset_list = op.subset_list)

@camel_registry.dumper(TransformStatisticWorkflowOp, 'transform-statistic', version = 1)
def _dump_v1(op):
    return dict(name = op.name,
                statistic = op.statistic,
                statistic_name = op.function_name,
                by = op.by,
                subset_list = op.subset_list)
    
@camel_registry.loader('transform-statistic', version = any)
def _load(data, version):
    return TransformStatisticWorkflowOp(**data)

@camel_registry.loader('transform-statistic', version = 1)
def _load_v1(data, version):
    data['statistic'] = tuple(data['statistic'])[0]
    del data['statistic_name']
    # TODO - some warning about how stats have changed.
    return TransformStatisticWorkflowOp(**data)