#!/usr/bin/env python3.4
# coding: latin-1
# (c) Massachusetts Institute of Technology 2015-2018
# (c) Brian Teague 2018-2021
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
cytoflow.views.radviz
---------------------
"""
from traits.api import provides, Constant
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import scipy.spatial.distance
import pandas as pd
import numpy as np
import cytoflow.utility as util
from .i_view import IView
from .base_views import BaseNDView
[docs]@provides(IView)
class RadvizView(BaseNDView):
"""
Plots a Radviz plot. Radviz plots project multivariate plots into
two dimensions. Good for looking for clusters.
Attributes
----------
Notes
-----
The Radviz plot is based on a method of "dimensional anchors" [#f1]_.
The variables are conceived as points equidistant around a unit circle,
and each data point connected to each anchor by a spring whose stiffness
corresponds to the value of that data point. The location of the data
point is the location where springs' tensions are minimized. Fortunately,
there is fast matrix math to do this.
As per [#f2]_, the order of the anchors can make a huge difference. I've
adapted the code from the R ``radviz`` package [#f3]_ to compute the cosine
similarity of all possible circular permutations ("necklaces"). For a
moderate number of anchors such as is likely to be encountered here,
computing them all is completely feasible.
References
----------
.. [#f1] Hoffman P, Grinstein G, Pinkney D. Dimensional anchors: a graphic
primitive for multidimensional multivariate information visualizations.
Proceedings of the 1999 workshop on new paradigms in information
visualization and manipulation in conjunction with the eighth ACM
internation conference on Information and knowledge management.
1999 Nov 1 (pp. 9-16). ACM.
.. [#f2] Di Caro L, Frias-Martinez V, Frias-Martinez E. Analyzing the role
of dimension arrangement for data visualization in radviz. Advances in
Knowledge Discovery and Data Mining. 2010:125-32.
.. [#f3] https://github.com/yannabraham/Radviz
Examples
--------
Make a little data set.
.. plot::
:context: close-figs
>>> import cytoflow as flow
>>> import_op = flow.ImportOp()
>>> import_op.tubes = [flow.Tube(file = "Plate01/RFP_Well_A3.fcs",
... conditions = {'Dox' : 10.0}),
... flow.Tube(file = "Plate01/CFP_Well_A4.fcs",
... conditions = {'Dox' : 1.0})]
>>> import_op.conditions = {'Dox' : 'float'}
>>> ex = import_op.apply()
Plot the radviz.
.. plot::
:context: close-figs
>>> flow.RadvizView(channels = ['B1-A', 'V2-A', 'Y2-A'],
... scale = {'Y2-A' : 'log',
... 'V2-A' : 'log',
... 'B1-A' : 'log'},
... huefacet = 'Dox').plot(ex)
"""
id = Constant('edu.mit.synbio.cytoflow.view.radviz')
friend_id = Constant("Radviz Plot")
[docs] def plot(self, experiment, **kwargs):
"""
Plot a faceted Radviz plot
Parameters
----------
alpha : float (default = 0.25)
The alpha blending value, between 0 (transparent) and 1 (opaque).
s : int (default = 2)
The size in points^2.
marker : a matplotlib marker style, usually a string
Specfies the glyph to draw for each point on the scatterplot.
See `matplotlib.markers <http://matplotlib.org/api/markers_api.html#module-matplotlib.markers>`_ for examples. Default: 'o'
Notes
-----
Other ``kwargs`` are passed to `matplotlib.pyplot.scatter <https://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.scatter.html>`_
"""
if len(self.channels) < 3:
raise util.CytoflowViewError('channels',
"Must have at least 3 channels")
super().plot(experiment, **kwargs)
def _grid_plot(self, experiment, grid, **kwargs):
# xlim and ylim, xscale and yscale are the limits and scale of the
# plane onto which we are projecting. the kwargs 'scale' and 'lim'
# are the data scale and limits, respectively
scale = kwargs.pop('scale')
lim = kwargs.pop('lim')
# optimize anchor order
df = pd.DataFrame()
for c in self.channels:
vmin = lim[c][0]
vmax = lim[c][1]
c_scaled = pd.Series(data = scale[c].norm(vmin = vmin, vmax = vmax)(grid.data[c].values),
index = grid.data[c].index,
name = c)
c_scaled[(grid.data[c] < vmin) | (grid.data[c] > vmax)] = np.nan
df[c] = c_scaled
df.dropna(axis = 0, how = 'any', inplace = True)
m = len(df.columns)
s = np.array([(np.cos(t), np.sin(t))
for t in [2.0 * np.pi * (i / float(m))
for i in range(m)]])
dotmat = np.dot(df.T.values, df.values)
sim = dotmat / np.matmul(np.sqrt(np.diag(dotmat))[:, np.newaxis],
np.sqrt(np.diag(dotmat))[np.newaxis, :])
def similarity_metric(loc, sim, p):
p_loc = loc[p]
p_sim = sim[p]
dist_array = scipy.spatial.distance.pdist(p_loc)
dist_matrix = scipy.spatial.distance.squareform(dist_array)
return -1.0 * np.sum(dist_matrix * p_sim)
# for a modest number of anchors, just look permutations
# no need for anything fancier.
best_p = None
best_score = -np.inf
for p in _get_necklaces(np.arange(m)):
score = similarity_metric(s, sim, p)
if score > best_score:
best_p = p
best_score = score
kwargs.setdefault('alpha', 0.25)
kwargs.setdefault('s', 2)
kwargs.setdefault('marker', 'o')
kwargs.setdefault('antialiased', True)
# memo to track if we've put annotations on an axes yet
ax_annotations = {}
grid.map(_radviz_plot,
*self.channels,
ax_annotations = ax_annotations,
scale = scale,
lim = lim,
order = best_p,
**kwargs)
return {}
def _radviz_plot(*channels, ax_annotations, scale, lim, order, **kwargs):
color = kwargs.pop('color')
df = pd.DataFrame()
for c in channels:
vmin = lim[c.name][0]
vmax = lim[c.name][1]
c_scaled = pd.Series(data = scale[c.name].norm(vmin = vmin, vmax = vmax)(c.values),
index = c.index,
name = c.name)
c_scaled[(c < vmin) | (c > vmax)] = np.nan
df[c.name] = c_scaled
df.dropna(axis = 0, how = 'any', inplace = True)
# reorder anchors
df = df[df.columns[order]]
# adapted from pandas.plotting._misc
m = len(df.columns)
s = np.array([(np.cos(t), np.sin(t))
for t in [2.0 * np.pi * (i / float(m))
for i in range(m)]])
to_plot = [[], []]
for i in range(len(df)):
row = df.iloc[i].values
row_ = np.repeat(np.expand_dims(row, axis = 1), 2, axis = 1)
y = (s * row_).sum(axis = 0) / row.sum()
to_plot[0].append(y[0])
to_plot[1].append(y[1])
ax = plt.gca()
ax.scatter(to_plot[0], to_plot[1], color = color, **kwargs)
# have we already annotated these axes?
if ax in ax_annotations:
return
ax_annotations[ax] = True
ax.set_axis_off()
for xy, name in zip(s, df.columns):
ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray'))
if xy[0] < 0.0 and xy[1] < 0.0:
ax.text(xy[0] - 0.025, xy[1] - 0.025, name,
ha='right', va='top', size='small')
elif xy[0] < 0.0 and xy[1] >= 0.0:
ax.text(xy[0] - 0.025, xy[1] + 0.025, name,
ha='right', va='bottom', size='small')
elif xy[0] >= 0.0 and xy[1] < 0.0:
ax.text(xy[0] + 0.025, xy[1] - 0.025, name,
ha='left', va='top', size='small')
elif xy[0] >= 0.0 and xy[1] >= 0.0:
ax.text(xy[0] + 0.025, xy[1] + 0.025, name,
ha='left', va='bottom', size='small')
ax.axis('scaled')
def _get_necklaces(L):
import itertools as it
B = it.combinations(L,2)
swaplist = [e for e in B]
unique_necklaces = []
unique_necklaces.append(L)
for pair in swaplist:
necklace = list(L)
e1 = pair[0]
e2 = pair[1]
indexe1 = np.where(L == e1)[0][0]
indexe2 = np.where(L == e2)[0][0]
#swap
necklace[indexe1],necklace[indexe2] = necklace[indexe2], necklace[indexe1]
unique_necklaces.append(necklace)
return unique_necklaces
util.expand_class_attributes(RadvizView)
util.expand_method_parameters(RadvizView, RadvizView.plot)