Source code for cytoflowgui.workflow.serialization

#!/usr/bin/env python3.8
# coding: latin-1

# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2022
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
cytoflowgui.workflow.serialization
----------------------------------

Utility bits that let us use `camel` to serialize a `RemoteWorkflow`.  

Many of the dumpers and loaders support serializing `pandas` types,
such as `pandas.Series` and `pandas.DataFrame`, or for testing
serialization with unit tests.
"""

import pandas, numpy
from pandas.api.types import CategoricalDtype

from traits.api import DelegationError

from textwrap import dedent  # @UnusedImport

#### YAML serialization

from camel import Camel, CamelRegistry, YAML_TAG_PREFIX

# the camel registry singletons
camel_registry = CamelRegistry()
standard_types_registry = CamelRegistry(tag_prefix = YAML_TAG_PREFIX)

[docs]def load_yaml(path): """ Load a Python object from a YAML file. Parameters ---------- path : string The path to the YAML file to load Returns ------- object The Python object loaded from the YAML file """ with open(path, 'r') as f: data = Camel([camel_registry]).load(f.read()) return data
[docs]def save_yaml(data, path, lock_versions = {}): """ Save a Python object to a YAML file Parameters ---------- data : object The Python object to serialize path : string The path to save to lock_versions : dict A dictionary of types and versions of dumpers to use when serializing. """ with open(path, 'w') as f: c = Camel([standard_types_registry, camel_registry]) for klass, version in lock_versions.items(): c.lock_version(klass, version) f.write(c.dump(data))
# camel adapters for traits lists and dicts, numpy types from numpy import float64, int64, bool_ @standard_types_registry.dumper(float64, 'float', version = None) def _dump_float(fl): return repr(float(fl)).lower() @standard_types_registry.dumper(int64, 'int', version = None) def _dump_int(i): return repr(int(i)).lower() @standard_types_registry.dumper(bool_, 'bool', version = None) def _dump_bool(b): return repr(bool(b)).lower() from traits.trait_handlers import TraitListObject, TraitDictObject from traits.api import Undefined @standard_types_registry.dumper(TraitListObject, 'seq', version = None) def _dump_list(tlo): return list(tlo) @standard_types_registry.dumper(TraitDictObject, 'map', version = None) def _dump_dict(tdo): return dict(tdo) # for some reason, the version of this in camel.__init__ doesn't get called. # if we re-define it here, everything is fine. @standard_types_registry.dumper(tuple, 'python/tuple', version=None) def _dump_tuple(data): return list(data) @standard_types_registry.loader('python/tuple', version=None) def _load_tuple(data, version): return tuple(data) # @standard_types_registry.dumper(TraitTuple, 'python/tuple', version = None) # def _dump_tuple(tt): # return list(tt) @camel_registry.dumper(Undefined.__class__, 'undefined', version = 1) def _dump_undef(ud): return "Undefined" @camel_registry.loader('undefined', version = 1) def _load_undef(data, version): return Undefined @camel_registry.dumper(numpy.dtype, 'numpy-dtype', version = 1) def _dump_dtype(d): return str(d) @camel_registry.loader('numpy-dtype', version = 1) def _load_dtype(data, version): return numpy.dtype(data) @camel_registry.dumper(CategoricalDtype, 'pandas-categorical-dtype', version = 1) def _dump_categorical_dtype(d): return dict(categories = list(d.categories), ordered = d.ordered) @camel_registry.loader('pandas-categorical-dtype', version = 1) def _load_categorical_dtype(data, version): return CategoricalDtype(categories = data['categories'], ordered = data['ordered']) @camel_registry.dumper(pandas.MultiIndex, 'pandas-multiindex', version = 1) def _dump_multiindex_v1(d): return dict(levels = list(d.levels), labels = [x.tolist() for x in d.codes], names = list(d.names)) @camel_registry.dumper(pandas.MultiIndex, 'pandas-multiindex', version = 2) def _dump_multiindex_v2(d): return dict(levels = list(d.levels), codes = [x.tolist() for x in d.codes], names = list(d.names)) @camel_registry.loader('pandas-multiindex', version = 1) def _load_multiindex_v1(data, version): return pandas.MultiIndex(levels = data['levels'], codes = data['labels'], names = data['names']) @camel_registry.loader('pandas-multiindex', version = 2) def _load_multiindex_v2(data, version): return pandas.MultiIndex(levels = data['levels'], codes = data['codes'], names = data['names']) @camel_registry.dumper(pandas.Index, 'pandas-index', version = 1) def _dump_index(d): return dict(name = d.name, values = d.values.tolist(), dtype = str(d.dtype)) @camel_registry.loader('pandas-index', version = 1) def _load_index(data, version): return pandas.Index(name = data['name'], data = data['values'], dtype = data['dtype']) @camel_registry.dumper(pandas.Int64Index, 'pandas-int64index', version = 1) def _dump_int64index(d): return dict(name = d.name, values = d.values.tolist()) @camel_registry.loader('pandas-int64index', version = 1) def _load_int64index(data, version): return pandas.Int64Index(name = data['name'], data = data['values']) @camel_registry.dumper(pandas.Float64Index, 'pandas-float64index', version = 1) def _dump_float64index(d): return dict(name = d.name, values = d.values.tolist()) @camel_registry.loader('pandas-float64index', version = 1) def _load_float64index(data, version): return pandas.Float64Index(name = data['name'], data = data['values']) @camel_registry.dumper(pandas.CategoricalIndex, 'pandas-categoricalindex', version = 1) def _dump_categoricalindex(d): return dict(name = d.name, values = d.values.tolist(), categories = d.categories.values.tolist(), ordered = d.ordered) @camel_registry.loader('pandas-categoricalindex', version = 1) def _load_categoricalindex(data, version): return pandas.CategoricalIndex(name = data['name'], data = data['values'], categories = data['categories'], ordered = data['ordered']) @camel_registry.dumper(pandas.Series, 'pandas-series', version = 1) def _dump_series_v1(s): return dict(index = list(s.index), data = list(s.values)) @camel_registry.dumper(pandas.Series, 'pandas-series', version = 2) def _dump_series_v2(s): return dict(index = s.index, data = s.values.tolist(), dtype = s.dtype) @camel_registry.dumper(pandas.Series, 'pandas-series', version = 3) def _dump_series(s): return dict(index = s.index, data = s.values.tolist(), dtype = str(s.dtype)) @camel_registry.loader('pandas-series', version = 1) def _load_series_v1(data, version): ret = pandas.Series(data = data['data'], index = data['index']) if str(ret.dtype) == 'object': ret = pandas.Series(data = data['data'], index = data['index'], dtype = "category") return ret @camel_registry.loader('pandas-series', version = 2) def _load_series_v2(data, version): return pandas.Series(data = data['data'], index = data['index'], dtype = data['dtype']) @camel_registry.loader('pandas-series', version = 3) def _load_series_v3(data, version): return pandas.Series(data = data['data'], index = data['index'], dtype = data['dtype']) # a few bits for testing serialization
[docs]def traits_eq(self, other): """Are the copyable traits of two `traits.has_traits.HasTraits` equal?""" return self.trait_get(self.copyable_trait_names()) == other.trait_get(self.copyable_trait_names())
[docs]def traits_hash(self): """Get a unique hash of a `traits.has_traits.HasTraits`""" return hash(tuple(self.trait_get(self.copyable_trait_names()).items()))
# set underlying cytoflow repr
[docs]def traits_repr(obj): """A uniform implementation of **__repr__** for `traits.has_traits.HasTraits`""" return obj.__class__.__name__ + '(' + traits_str(obj) + ')'
[docs]def traits_str(obj): """A uniform implementation of **__str__** for `traits.has_traits.HasTraits`""" try: traits = obj.trait_get(transient = lambda x: x is not True, status = lambda x: x is not True, type = lambda x: x != 'delegate') traits.pop('op', None) # filter out traits that haven't changed default_traits = obj.__class__().trait_get(transient = lambda x: x is not True, status = lambda x: x is not True, type = lambda x: x != 'delegate') traits = [(k, v) for k, v in traits.items() if k not in default_traits or v != default_traits[k]] # %s uses the str function and %r uses the repr function traits_str = ', '.join(["%s = %s" % (k, v.__name__) if callable(v) else "%s = %r" % (k, v) for k, v in traits]) return traits_str except DelegationError: return obj.__class__.__name__ + '(<Delegation error>)'