#!/usr/bin/env python
# -*- coding: utf-8 -*-
# This file is part of curveball.
# https://github.com/yoavram/curveball
# Licensed under the MIT license:
# http://www.opensource.org/licenses/MIT-license
# Copyright (c) 2015, Yoav Ram <yoav@yoavram.com>
from __future__ import division
from builtins import range
from past.utils import old_div
import copy
import numpy as np
import scipy.stats
import matplotlib as mpl
import matplotlib.pyplot as plt
try:
from pandas.plotting import lag_plot # py3.6
except ImportError:
from pandas.tools.plotting import lag_plot # py3.5
import seaborn as sns
sns.set_style("ticks")
from matplotlib.patches import RegularPolygon
from string import ascii_uppercase
[docs]def plot_wells(df, x='Time', y='OD', plot_func=plt.plot, output_filename=None):
"""Plot a grid of plots, one for each well in the plate.
The facetting is done by the ``Row`` and ``Col`` columns of `df`.
The colors are given by the ``Color`` column,
the labels of the colors are given by the ``Strain`` column.
If ``Strain`` is missing then the coloring is done by the ``Well`` column.
Parameters
----------
df : pandas.DataFrame
growth curve data, see :py:mod:`curveball.ioutils` for a detailed definition.
x : str, optional
name of column for x-axis, defaults to ``Time``.
y : str, optional
name of column for y-axis, defaults to ``OD``.
plot_func : func, optional
function to use for plotting, defaults to :py:func:`matplotlib.pyplot.plot`
output_filename : str, optional
filename to save the resulting figure; if not given, figure is not saved.
Returns
-------
seaborn.FacetGrid
figure object.
"""
if 'Strain' in df:
hue = 'Strain'
palette = df.Color.unique() if 'Color' in df else sns.color_palette()
hue_order = df.Strain.unique()
palette[palette == '#ffffff'] = '#000000'
else:
hue = 'Well'
palette = sns.color_palette()
hue_order = df.Well
height = len(df.Row.unique())
width = len(df.Col.unique())
g = sns.FacetGrid(df, hue=hue, col='Col', row='Row',
palette=palette, hue_order=hue_order,
sharex=True, sharey=True, height=1,
aspect=old_div(width,float(height)), despine=True,margin_titles=True)
g.map(plot_func, x, y)
g.fig.set_figwidth(width)
g.fig.set_figheight(height)
plt.locator_params(nbins=4) # 4 ticks is enough
g.set_axis_labels('','') # remove facets axis labels
g.fig.text(0.5, 0, x, size='x-large') # xlabel
g.fig.text(-0.01, 0.5, y, size='x-large', rotation='vertical') # ylabel
if output_filename:
g.savefig(output_filename, bbox_inches='tight', pad_inches=1)
return g
[docs]def plot_strains(data, x='Time', y='OD', plot_func=plt.plot, by=None, agg_func=np.mean, hue='Strain', color=None, output_filename=None, **kwargs):
"""Aggregate by strain and plot the results on one figure with different color for each strain.
The grouping of the data is done by the ``Strain`` and either ``Cycle Nr.`` or ``Time`` columns of `data`;
the aggregation is done by the `agg_func`, which defaults to :py:func:`numpy.mean`.
The colors are given by the ``Color`` column, the labels of the colors are given by the ``Strain`` column of `data`.
Parameters
----------
data : pandas.DataFrame
growth curve data, see :py:mod:`curveball.ioutils` for a detailed definition.
x : str, optional
name of column for x-axis, defaults to ``Time``.
y : str, optional
name of column for y-axis, defaults to ``OD``.
plot_func : func, optional
function to use for plotting, defaults to :py:func:`matplotlib.pyplot.plot`
by : tuple of str, optional
used for grouping the data, defaults to ``('Strain', 'Cycle Nr.')`` or ``('Strain', 'Time')``, whichever is available.
plot_func : func, optional
function to use for aggregating the data, defaults to :py:func:`numpy.mean`.
color : seaborn color palette
a seaborn color palette to use if there is no ``Color`` column; if not given, using the default palette.
output_filename : str, optional
filename to save the resulting figure; if not given, figure is not saved.
Returns
-------
seaborn.FacetGrid
figure object.
Raises
------
ValueError
raised if `by` isn't set and `data` doesn't contain ``Strain`` and either ``Time`` or ``Cycle Nr.``.
"""
if 'Color' in data:
palette = data.Color.unique()
palette[palette == '#ffffff'] = '#000000'
else:
palette = color or sns.color_palette()
if by is None:
if 'Cycle Nr.' in data and 'Strain' in data:
by = ['Strain', 'Cycle Nr.']
elif 'Time' in data and 'Strain' in data:
by = ['Strain', 'Time']
else:
raise ValueError("If by is not set then data must have column Strain and either Time or Cycle Nr.")
grp = data.groupby(by=by)
agg = grp.aggregate(agg_func).reset_index()
g = sns.FacetGrid(agg, hue=hue, height=5, aspect=1.5, palette=palette, hue_order=data[hue].unique())
g.map(plot_func, x, y);
g.add_legend()
if output_filename:
g.savefig(output_filename, bbox_inches='tight', pad_inches=1)
return g
[docs]def tsplot(data, x='Time', y='OD', ci_level=95, ax=None, color=None, output_filename=None, **kwargs):
"""Time series plot of the data by strain (if applicable) or well.
The grouping of the data is done by the value of `x` and ``Strain``, if such a column exists in `data`;
otherwise it is done by `x` and ``Well``.
The aggregation is done by :py:func:`seaborn.lineplot` which calculates the mean with a confidence interval.
The colors are given by the ``Color`` column, the labels of the colors are given by the ``Strain`` column;
if ``Strain`` and ``Color`` don't exist in `data` then
the function will use a default palette and color the lines by well.
Parameters
----------
data : pandas.DataFrame
growth curve data, see :py:mod:`curveball.ioutils` for a detailed definition.
x : str, optional
name of column for x-axis, defaults to ``Time``.
y : str, optional
name of column for y-axis, defaults to ``OD``.
ci_level : int, optional
confidence interval width in precent (0-100), defaults to 95.
ax : matplotlib.axes.Axes, optional
plot into this axes, if not given create a new figure.
color : seaborn color palette
a seaborn color palette to use if there is no ``Color`` column; if not given, using the default palette.
output_filename : str, optional
filename to save the resulting figure; if not given, figure is not saved.
Returns
-------
matplotlib.axes.Axes
axes object
"""
if 'Strain' in data:
condition = 'Strain'
else:
condition = 'Well'
if 'Color' in data:
palette = data['Color'].unique()
palette[palette == '#ffffff'] = '#000000'
else:
palette = color or sns.color_palette()
g = sns.lineplot(data=data, x=x, hue=condition, y=y,
err_style='band', ci=ci_level, palette=list(palette), ax=ax)
sns.despine()
if output_filename:
g.figure.savefig(output_filename, bbox_inches='tight', pad_inches=1)
return g
[docs]def plot_plate(data, edge_color='#888888', output_filename=None):
"""Plot of the plate color mapping.
The function will plot the color mapping in `data`:
a grid with enough columns and rows for the ``Col`` and ``Row`` columns in `data`,
where the color of each grid cell given by the ``Color`` column.
Parameters
----------
data : pandas.DataFrame
growth curve data, see :py:mod:`curveball.ioutils` for a detailed definition.
edge_color : str
color hex string for the grid edges.
Returns
-------
fig : matplotlib.figure.Figure
figure object
ax : numpy.ndarray
array of axis objects.
"""
plate = data.pivot('Row', 'Col', 'Color').values
height, width = plate.shape
fig = plt.figure(figsize=((width + 2.0) / 3.0, (height + 2.0) / 3.0))
ax = fig.add_axes((0.05, 0.05, 0.9, 0.9),
aspect='equal', frameon=False,
xlim=(-0.05, width + 0.05),
ylim=(-0.05, height + 0.05))
for axis in (ax.xaxis, ax.yaxis):
axis.set_major_formatter(plt.NullFormatter())
axis.set_major_locator(plt.NullLocator())
# Create the grid of squares
squares = np.array([[RegularPolygon((i + 0.5, j + 0.5),
numVertices=4,
radius=0.5 * np.sqrt(2),
orientation=old_div(np.pi, 4),
ec=edge_color,
fc=plate[height-1-j,i])
for j in range(height)]
for i in range(width)])
[ax.add_patch(sq) for sq in squares.flat]
ax.set_xticks(np.arange(width) + 0.5)
ax.set_xticklabels(np.arange(1, 1 + width))
ax.set_yticks(np.arange(height) + 0.5)
ax.set_yticklabels(ascii_uppercase[height-1::-1])
ax.xaxis.tick_top()
ax.yaxis.tick_left()
ax.tick_params(length=0, width=0)
if output_filename:
fig.savefig(output_filename, bbox_inches='tight', pad_inches=1)
return fig, ax
[docs]def plot_params_distribution(param_samples, color='k', cmap="viridis", alpha=None):
"""Plots a distribution of model parameter samples generated with :py:func:`curveball.models.sample_params`.
Parameters
----------
param_samples : pandas.DataFrame
data frame of samples; each row is one sample, each column is one parameter.
alpha : float
transparency of plot markers, defaults to :math:`1/n^{1/4}` where *n* is number of rows in `param_samples`.
Returns
-------
seaborn.Grid
figure object
"""
nsamples = param_samples.shape[0]
g = sns.PairGrid(param_samples)
if alpha is None:
alpha = 1.0 / np.power(nsamples, 1.0 / 4.0)
g.map_upper(plt.scatter, alpha=alpha, color=color)
g.map_lower(sns.kdeplot, cmap=cmap, legend=False, shade=True, shade_lowest=False)
g.map_diag(plt.hist, facecolor=color) # https://github.com/mwaskom/seaborn/pull/788
return g
def _plot_fitted_histogram(data, rv=scipy.stats.norm, color='k', label=None, alpha=0.5, ax=None):
"""This is basically `sns.distplot(fit=rv)`.
TODO: `low,high = np.percentile(x, 2.5), np.percentile(x, 97.5)`
"""
if ax is None:
fig, ax = plt.subplots(1, 1)
else:
fig = ax.figure
rv_params = rv.fit(data)
rv_inst = rv(*rv_params)
nbins = min(100, len(data))
n, bins, patches = ax.hist(data, bins=nbins, color=color, alpha=alpha, density=True)
ax.plot(bins, rv_inst.pdf(bins), color='k', lw=2)
ax.annotate(
r'$\mu={:.2g}, \sigma={:.2g}$'.format(rv_inst.mean(),
rv_inst.std()),
xy=(bins[len(bins)//2], np.max(n)),
xycoords="data",
horizontalalignment='center',
fontsize=plt.rcParams['axes.labelsize']
)
return fig, ax
[docs]def plot_model_residuals(model_fit, rv=scipy.stats.norm, color='k'):
"""Plot of the residuals of a model fit.
The function will plot the residuals - the difference between data and model - for a given model fit.
The left panel shows the residuals over time; the right panel shows the histogram of the residuals with a fitted distribution curve.
Parameters
----------
model_fit : lmfit.ModelResult
the result of a model fitting procedure.
rv : scipy.stats.rv_continuous, optional
:py:class:`scipy.stats.rv_continuous` random variable whose probability density function (pdf)
will be fitted to the histogram. Defaults to the normal distribution (`scipy.stats.norm`).
color : str, optional
color string for the plot, defaults to `k` for black.
Returns
-------
fig : matplotlib.figure.Figure
figure object
ax : numpy.ndarray
array of axis objects.
"""
w, h= plt.rcParams['figure.figsize']
fig,ax = plt.subplots(1, 2, figsize=(w * 2, h))
model_fit.plot_residuals(ax=ax[0], data_kws={'color': color}) # removed, causes bug in lmfit: fit_kws={'color': color})
ax[0].set_xlabel('Time (hr)')
ax[0].set_ylabel('Residuals')
ax[0].legend().set_visible(False)
ax[0].set_title('')
_plot_fitted_histogram(model_fit.residual, rv=rv, color=color, ax=ax[1])
ax[1].set(xlabel='Residuals', ylabel='Frequency')
fig.tight_layout()
sns.despine()
return fig, ax
[docs]def plot_residuals(df, time='Time', value='OD', resid_func=lambda x: x - x.mean(), rv=scipy.stats.norm,
color='k', ax=None):
"""Plot of the residuals of in the data.
The function will plot the residuals - the difference between data and average at each time point.
The left panel shows the residuals over time.
The middle panel shows the histogram of the residuals with a fitted distribution (defaults to Gaussian).
The right panel shows the regression between the standard deviation at time `t+1` and `t` to identify autocorrelation.
Parameters
----------
df : pandas.DataFrame
a data frame with columns ``Time`` and ``OD``.
time : str, optional
name of column over which to group and plot the residuals. Defaults to ``Time``.
value : str, optional
name of column in `df` of the value on which to compute the residuals. Defaults to ``OD``.
resid_func : function, optional
function to calculate residuals. Defaults to ``x - x.mean()``.
rv : scipy.stats.rv_continuous, optional
:py:class:`scipy.stats.rv_continuous` random variable whose probability density function (pdf)
will be fitted to the histogram. Defaults the normal distribution (:py:class:`scipy.stats.norm`).
color : str, optional
color string for the plot, defaults to `k` for black.
Returns
-------
fig : matplotlib.figure.Figure
figure object
ax : numpy.ndarray
array of axis objects.
"""
w, h= plt.rcParams['figure.figsize']
fig,ax = plt.subplots(1, 3, figsize=(w * 3, h))
residuals = df.groupby(time)[value].transform(resid_func).values
ax[0].plot(df[time], residuals, ls='', marker='o', color=color)
ax[0].set(xlabel=time, ylabel='Residuals')
_plot_fitted_histogram(residuals, rv=rv, color=color, ax=ax[1])
ax[1].set(xlabel='Residuals', ylabel='Frequency')
sigmas = df.groupby(time)[value].std()
linreg = scipy.stats.linregress(sigmas.values[:-1], sigmas.values[1:])
eq = r'$\sigma_{{t+1}} = {:.2g} + {:.2g} \sigma_{{t}}$'.format(linreg.intercept, linreg.slope)
sigma_range = np.linspace(sigmas.min(), sigmas.max())
ax[2].plot(sigma_range, sigma_range, color='k', ls='--', label=r'$\sigma_{t+1}=\sigma_{t}$')
ax[2].plot(sigma_range, linreg.intercept + linreg.slope * sigma_range, color=color, label=eq)
lag_plot(sigmas, c='k', ax=ax[2])
ax[2].set(xlabel=r'$\sigma_{t}$', ylabel=r'$\sigma_{t+1}$')
ax[2].legend(loc='upper left')
fig.tight_layout()
sns.despine()
return fig, ax
[docs]def plot_sample_fit(model_fit, param_samples, fit_kws=None, data_kws=None, sample_kws=None):
"""Plot of sampled curve fits.
The function will plot the main model fit and the sampled curve fits based on a table of sample parameters.
Parameters
----------
model_fit : lmfit.ModelResult
the result of a model fitting procedure.
param_samples : pandas.DataFrame
data frame of samples; each row is one sample, each column is one parameter.
fit_kws, data_kws, sample_kws : dict
dictionaries of plot directives for the fit, data, and sampled fit curves.
Returns
-------
fig : matplotlib.figure.Figure
figure object
ax : numpy.ndarray
array of axis objects.
"""
t = np.linspace(0, model_fit.userkws['t'].max())
def f(params):
return model_fit.model.eval(t=t, params=params)
nsamples = param_samples.shape[0]
_fit_kws = dict(linewidth=5)
if fit_kws: _fit_kws.update(fit_kws)
_data_kws = dict(marker='.')
if data_kws: _data_kws.update(data_kws)
_sample_kws = dict(linestyle='--', color='gray', alpha=1/np.sqrt(nsamples))
if sample_kws: _sample_kws.update(sample_kws)
ax = model_fit.plot_fit(init_kws={'ls': ''}, fit_kws=_fit_kws, data_kws=_data_kws)
for i in range(nsamples):
sample = param_samples.iloc[i, :]
params = model_fit.params.copy()
for k, v in params.items():
if v.vary:
params[k].set(value=sample[k])
plt.plot(t, f(params), **_sample_kws)
ax.legend().set_visible(False)
ax.set(ylabel='OD', xlabel='Time', title='')
sns.despine()
return ax.figure, ax