Source code for curveball.competitions

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# This file is part of curveball.
# https://github.com/yoavram/curveball

# Licensed under the MIT license:
# http://www.opensource.org/licenses/MIT-license
# Copyright (c) 2015, Yoav Ram <yoav@yoavram.com>
from __future__ import division
from builtins import range
import warnings
import copy
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from scipy.optimize import curve_fit
import pandas as pd
import lmfit
import curveball
import seaborn as sns
sns.set_style("ticks")

def _alfa(t, q0, v):
if np.isinf(q0) or np.isinf(v):
return 1.0
return q0 / (q0 + np.exp(-v * t))

[docs]def double_baranyi_roberts_ode0(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model [1]_. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i \Big(1 - \Big(\frac{\sum_{j}{N_j}}{K_i}\Big)^{\nu_i}\Big)

- :math:N_i: population size of strain *i*.
- :math:r_i: initial per capita growth rate of strain *i*
- :math:K_i: maximum population size of strain *i*
- :math:\nu_i: curvature of the logsitic term of strain *i*
- :math:\alpha_i(t)= \frac{q_{0,i}}{q_{0,i} + e^{-v_i t}}
- :math:q_{0,i}: initial adjustment of strain *i* to current environment
- :math:v_i: adjustment rate of strain *i*

Parameters
----------
y : float, float
population size
t : float
time, usually in hours
r : float, float
initial per capita growth rate
K : float, float
maximum population size (:math:K>0)
nu : float, float
curvature of the logsitic term (:math:\nu>0)
q0 : float, float
initial adjustment to current environment (:math:0<q_0<1)
v : float, float
adjustment rate (:math:v>0)

Returns
-------
float, float
population growth rate.

References
----------
.. [1] Baranyi, J., Roberts, T. A., 1994. A dynamic approach to predicting bacterial growth in food <www.ncbi.nlm.nih.gov/pubmed/7873331>_. Int. J. Food Microbiol.
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
dydt = alfa[0] * r[0] * y[0] * (1 - ((y[0] + y[1]) / K[0])**nu[0]), alfa[1] * r[1] * y[1] * (1 - ((y[0] + y[1]) / K[1])**nu[1])
return dydt

[docs]def double_baranyi_roberts_ode1(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i \Big(1 - \Big(\sum_{j}{\frac{N_j}{K_j}}\Big)^{\nu_i}\Big)

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
dydt = alfa[0] * r[0] * y[0] * (1 - (y[0] / K[0] + y[1] / K[1])**nu[0]), alfa[1] * r[1] * y[1] * (1 - (y[0] / K[0] + y[1]/K[1])**nu[1])
return dydt

[docs]def double_baranyi_roberts_ode2(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i \Big(1 - \sum_{j}{\Big(\frac{N_j}{K_j}\Big)^{\nu_j}}\Big)

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
dydt = alfa[0] * r[0] * y[0] * (1 - (y[0] / K[0])**nu[0] - (y[1] / K[1])**nu[1]), alfa[1] * r[1] * y[1] * (1 - (y[0] / K[0])**nu[0] - (y[1] / K[1])**nu[1])
return dydt

[docs]def double_baranyi_roberts_ode3(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i \Big( 1 - \Big(\frac{\sum_j{N_j}}{\bar{K}}\Big)^{\nu_i} \Big)

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
Kmean = (K[0] * y[0] + K[1] * y[1]) / (y[0] + y[1])
dydt = alfa[0] * r[0] * y[0] * (1 - ((y[0] + y[1]) / Kmean)**nu[0]), alfa[1] * r[1] * y[1] * (1 - ((y[0] + y[1]) / Kmean)**nu[1])
return dydt

[docs]def double_baranyi_roberts_ode4(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
dydt = alfa[0] * r[0] * y[0], alfa[1] * r[1] * y[1]
return dydt

[docs]def double_baranyi_roberts_ode5(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i \Big( 1 - \Big(\frac{N_i}{K_i}\Big)^{\nu_i}\Big)

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
dydt = alfa[0] * r[0] * y[0] * (1 - (y[0]/K[0])**nu[0]), alfa[1] * r[1] * y[1] * (1 - (y[1]/K[1])**nu[1])
return dydt

[docs]def double_baranyi_roberts_ode6(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i \Big(1 - \Big(\frac{\sum_j{N_j}}{K_i}\Big)^{\bar{\nu}}\Big)

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
numean = (nu[0]*y[0] + nu[1]*y[1]) / (y[0] + y[1])
dydt = alfa[0] * r[0] * y[0] * (1 - ((y[0]+y[1])/K[0])**numean), alfa[1] * r[1] * y[1] * (1 - ((y[0]+y[1])/K[1])**numean)
return dydt

[docs]def double_baranyi_roberts_ode7(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_1}{dt} = 0 \;\;\; \frac{dN_2}{dt} = r_2 \alpha_2(t) N_2 \Big(1 - \Big(\frac{N_2}{K_2}\Big)^{\nu_1}\Big)

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
numean = (nu[0]*y[0] + nu[1]*y[1]) / (y[0] + y[1])
dydt = 0, alfa[1] * r[1] * y[1] * (1 - (y[1]/K[1])**nu[1])
return dydt

[docs]def double_baranyi_roberts_ode8(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model. The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i N_i \Big(1 - \Big(\frac{\sum_{j}{N_j}}{K_i}\Big)^{\nu_i}\Big)

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
dydt = r[0] * y[0] * (1 - ((y[0] + y[1]) / K[0])**nu[0]), r[1] * y[1] * (1 - ((y[0] + y[1]) / K[1])**nu[1])
return dydt

[docs]def double_baranyi_roberts_gimenez_delgado_ode(y, t, r, K, nu, q0, v):
r"""A two species Baranyi-Roberts model with competition model inspired by Gimenez and Delgado (2004).
The function calculates the population growth rate at given time points.

.. math::

\frac{dN_i}{dt} = r_i \alpha_i(t) N_i \prod_j{\Big(1 - \Big(\frac{N_j}{K_j}\Big)^{\nu_j}\Big)}

See also
--------
curveball.competitions.double_baranyi_roberts_ode0
"""
alfa = _alfa(t, q0[0], v[0]), _alfa(t, q0[1], v[1])
prod = (1 - (y[0]/K[0])**(nu[0])) * (1 - (y[1]/K[1])**(nu[1]))
dydt = alfa[0] * r[0] * y[0] * prod, alfa[1] * r[1] * y[1] * prod
return dydt

[docs]def compete(m1, m2, p0=(0.5, 0.5), y0=None, t=None, hours=24, num_of_points=100,
nsamples=1, lag_phase=True, ode=double_baranyi_roberts_ode1,
params1=None, params2=None, ci=95, colors=None, ax=None, PLOT=False,
sampler='covar', df1=None, df2=None):
"""Simulate competitions between two strains using growth parameters estimated
by fitting growth models to growth curves data.

Integrates a 2-dimensional ODE system given by ode
with parameters extracted from :py:class:lmfit.model.ModelResult instances m1 and m2.
This implementation includes plotting (if required by PLOT);
resampling from the distribution of model parameters (when nsamples > 1);
changing the ODE system (by providing a different function in ode);
and more.

The function competes two strains/species;
therefore it expects two :py:class:lmfit.model.ModelResult objects,
two initial values in y0, etc.

Parameters
----------
m1, m2 : lmfit.model.ModelResult
model fitting results of growth models defined in :py:mod:curveball.models.
p0, y0 : tuple, optional
p0 is the initial relative frequencies; y0 is the initial population sizes.
if y0 is given than y0[0] and y0[0] are the initial population size for m1 and m2, respectively.
if y0 is not given, then it will be set to the average estimated y0 in m1 and m2 multiplied by p0.
t : numpy.ndarray or None, optional
array of time points in which to calculate the population sizes.
hours : int, optional
if t is not given, determines how many hours should the competition proceed, defaults to 24.
num_of_points : int, optional
if t is not given, determines the number of time points to use, defaults to 100.
nsamples : int, optional
how many replicates of the competition should be simulated;
if nsamples = 1, only one competition is simulated with the estimated parameters;
otherwise nsamples competitions are simulated with parameters drawn from a parameter distribution
determined by sampler. Defaults to 1.
lag_phase : bool, optional
if True, use lag phase as given by m1 and m2. Otherwise, override the lag phase parameters to prevent a lag phase. Defaults to :const:True.
ode : func, optional
an ordinary differential systems system defined by a function that accepts y, t, and additional arguments, and returns the derivate of y with respect to t. Defaults to :py:func:.double_baranyi_roberts_ode0.
params1, params2 : dict, optional
dictionaries of model parameter values; if given, overrides values from m1 and m2.
ci : float, optional
confidence interval size, in (0, 100), only applicable when PLOT is :const:True, defaults to 95%.
colors : sequence of str, optional
if PLOT is :const:True, this sets the colors of the drawn lines. colors[0] will be used for m1; colors[1] for m2. If not provided, defaults to the current pallete.
ax : matplotlib.axes.Axes, optional
if PLOT is :const:True, an axes to plot into; if not provided, a new one is created.
PLOT : bool, optional
if :const:True, the function will plot the curves of *y* as a function of *t*. Defaults to :const:False.
sampler : str, optional
if covar, the parameters will be sampled using the covariance matrix (:py:func:curveball.models.sample_params);
if bootstrap the parameters will be sampled by resampling the growth curves (:py:func:curveball.models.bootstrap_params).
df1, df2 : pandas.DataFrame, optional
the data frames used to fit m1 and m2, only used when sampler is bootstrap.

Returns
-------
t : numpy.ndarray
1d (or 2d, if nsamples>1) array of time points, in hours.
y: numpy.ndarray
2d (or 3d, if nsamples>1) array of strain frequencies.
First axis is time, second axis is strain, third axis (if applicable) is sample.
fig : matplotlib.figure.Figure
figure object.
ax : numpy.ndarray
array of :py:class:matplotlib.axes.Axes objects.

Raises
------
TypeError
if m1 or m2 are not :py:class:lmfit.model.ModelResult.
AssertionError
if an intermediate calculation produced an invalid result (not guaranteed).

Examples
--------
>>> import pandas as pd
>>> import curveball
>>> plate = pd.read_csv('plate_templates/G-RG-R.csv')
>>> df = curveball.ioutils.read_tecan_xlsx('data/Tecan_280715.xlsx', label='OD', plate=plate)
>>> green = curveball.models.fit_model(df[df.Strain == 'G'], PLOT=False, PRINT=False)[0]
>>> red = curveball.models.fit_model(df[df.Strain == 'R'], PLOT=False, PRINT=False)[0]
>>> t, y = curveball.competitions.compete(green, red, PLOT=False)

Notes
-----
To debug, uncomment lines to return the infodict that is returned from :py:func:scipy.integrate.odeint is run with full_output=1.
"""
if not isinstance(m1, lmfit.model.ModelResult):
raise TypeError("m1 must be %s, instead it is %s", lmfit.model.ModelResult, type(m1))
if not isinstance(m2, lmfit.model.ModelResult):
raise TypeError("m2 must be %s, instead it is %s", lmfit.model.ModelResult, type(m2))

if t is None:
t = np.linspace(0, hours, num_of_points)

if nsamples > 1:
# draw random params
sampler = sampler.lower()
if sampler == 'covar':
m1_samples = curveball.models.sample_params(m1, nsamples, params=params1)
m2_samples = curveball.models.sample_params(m2, nsamples, params=params2)
elif sampler == 'bootstrap':
if params1 or params2:
warnings.warn("Bootstrap sampling doesn't support params1 and params2 arguments")
if df1 is None or df2 is None:
raise ValueError("Bootstrap sampling requires kwargs df1 and df2")
m1_fixed = {pname for pname,p in m1.params.items() if not p.vary}
m2_fixed = {pname for pname,p in m2.params.items() if not p.vary}
# FIXME bootstrap_params has changed
m1_samples = curveball.models.bootstrap_params(df1, m1, nsamples,
fit_kws={'param_fix': m1_fixed})
m2_samples = curveball.models.bootstrap_params(df2, m2, nsamples,
fit_kws={'param_fix': m2_fixed})
else:
raise ValueError("Unknow sampler method: {0}".format(sampler))
min_nsamples = min(len(m1_samples), len(m2_samples))
if nsamples > min_nsamples:
warnings.warn("{0} resamples lost".format(nsamples - min_nsamples))
nsamples = min_nsamples
else:
nsamples = 1
# override model result params with arguments params1 and params2
if params1:
_params = copy.copy(m1.best_values)
_params.update(params1)
params1 = _params
else:
params1 = m1.best_values
if params2:
_params = copy.copy(m2.best_values)
_params.update(params2)
params2 = _params
else:
params2 = m2.best_values
# param samples contain the model fit estimated params
m1_samples = pd.DataFrame([params1])
m2_samples = pd.DataFrame([params2])
assert len(m1_samples) == len(m2_samples)

y = np.empty((len(t), 2, nsamples))
#infodict = [None]*nsamples # DEBUG

# simulate the ode for each param sample
for i in range(nsamples):
if y0 is None:
p0 = p0[0] / (p0[0] + p0[1]), p0[1] / (p0[0] + p0[1])
y0 = m1_samples.iloc[i]['y0'] * p0[0], m2_samples.iloc[i]['y0'] * p0[1]
r = m1_samples.iloc[i]['r'], m2_samples.iloc[i]['r']
K = m1_samples.iloc[i]['K'], m2_samples.iloc[i]['K']
nu = m1_samples.iloc[i].get('nu', 1.0), m2_samples.iloc[i].get('nu', 1.0)
if lag_phase:
q0 = m1_samples.iloc[i].get('q0', np.inf), m2_samples.iloc[i].get('q0', np.inf)
v = m1_samples.iloc[i].get('v', r[0]), m2_samples.iloc[i].get('v', r[1])
else:
q0 = np.inf, np.inf
v = np.inf, np.inf

args = (r, K, nu, q0, v)
y[:,:,i] = odeint(ode, y0=y0, t=t, args=args)

# DEBUG
#_y_,info = odeint(double_baranyi_roberts_ode0, y0, t, args=args, full_output=1)
#info['args'] = (y0,) + args
#infodict[i] = info
#if info['message'] == 'Integration successful.':
#   y[:,:,i] = _y_

if PLOT:
if ax is None:
fig,ax = plt.subplots(1, 1)
else:
fig = ax.get_figure()

df = pd.DataFrame()
for i in range(y.shape[1]):
_df = pd.DataFrame(y[:,i,:])
_df['Time'] = t
_df = pd.melt(_df, id_vars='Time', var_name='Replicate', value_name='y')
_df['Strain'] = i
df = pd.concat((df, _df))

if colors is not None:
colors = {i:c for i,c in enumerate(colors)}
sns.lineplot(data=df, x='Time', hue='Strain', y='y',
ci=ci, palette=colors, ax=ax)
ax.set_xlabel('Time (hour)')
ax.set_ylabel('OD')
sns.despine()
return t,y,fig,ax
return t,y#,infodict

[docs]def selection_coefs_ts(t, y, ax=None, PLOT=False):
r"""Calculate selection coefficient according to the following formula[2]_,
where :math:A(t), B(t) are population densities of assay strain *A* and reference strain *B* at time *t*:

.. math::

s = \frac{d}{dt} \log{\frac{A(t)}{B(t)}}

Parameters
----------
t : numpy.ndarray
array of time points, as produced by :py:func:compete
y : numpy.ndarray
array of population densities, as produced by :py:func:compete, where the first axis is time and the second axis is strain.
ax : matplotlib.axes.Axes, optional
if PLOT is :const:True, an axes to plot into; if not provided, a new one is created.
PLOT : bool, optional
if :const:True, the function will plot the curve of *s* as a function of *t*.

Returns
-------
svals : numpy.ndarray
the selection coefficients of the assay strain relative to the reference strain over time.
fig : matplotlib.figure.Figure
figure object.
ax : numpy.ndarray of matplotlib.axes.Axes
array of axes objects.

Notes
-----
This formula assumes that the frequencies of the strains follow a logistic curve.
Lag phases, interactions, etc. may cause this formula to become irrelevant.

References
----------
.. [12] Chevin, L-M. 2011. On Measuring Selection in Experimental Evolution <http://dx.doi.org/10.1098/rsbl.2010.0580>_. Biology Letters.
"""
dt = np.ediff1d(t, to_end=1)
dt[-1] = dt[-2]
svals = np.gradient(np.log(y[:,0] / y[:,1]), dt)
svals[np.isinf(svals)] = svals[np.isfinite(svals)].max()

if PLOT:
if ax is None:
fig,ax = plt.subplots(1,1)
else:
fig = ax.get_figure()
ax.plot(t, svals)
ax.set_ylabel('Selection coefficient')
ax.set_xlabel('Time (hour)')
sns.despine()
return svals, fig, ax

return svals

[docs]def fitness_LTEE(y, ref_strain=0, assay_strain=1, t0=0, t1=-1, ci=0):
r"""Calculate relative fitness according to the definition used in the *Long Term Evolutionary Experiment* (LTEE) [3]_,
where :math:A(t), B(t) are population densities of assay strain *A* and reference strain *B* at time *t*:

.. math::

\omega = \frac{\log{(A(t) / A(0))}}{\log{(B(t) / B(0))}}

If ci > 0, treats the third axis of y as replicates (from a parameteric boostrap) to calculate the confidence interval on the fitness.

Parameters
----------
y : numpy.ndarray
array of population densities, as produced by :py:func:compete, where the first axis is time and the second axis is strain. A third axis is also applicable if ci>0.
ref_strain : int, optional
the index of the reference strain within y. This strain's fitness is set to 1 by definition. Defaults to 0 (first).
assay_strain : int, optional
the index of the assay strain within y. The result will be the fitness of this strain relative to the fitness of the reference strain. Defaults to 1 (second).
t0 : int, optional
the index of the time point from which to start the calculation of the relative fitness, defaults to 0 (first).
t1 : int, optional
the index of the time point at which to end the calculation of the relative fitness, defaults to -1 (last).
ci : float between 0 and 1, optional
if not zero, a confidence interval will be calculated using the third axis of y as replicates.

Returns
-------
w : float
the fitness of the assay strain relative to the reference strain.
low : float
if ci > 0 and y.ndim = 3, this is the low limit of the confidence interval of the fitness
high : float
if ci > 0 and y.ndim = 3, this is the higher limit of the confidence interval of the fitness

Raises
------
ValueError
if confidence interval is requested (ci > 0) but there are no replicates (y.ndim != 3).

Notes
-----
The result may depend on the choice of t0 and t1 as well as the strain designations (ref_strain and assay_strain).

References
----------
.. [3] Wiser, M. J. & Lenski, R. E. 2015 A Comparison of Methods to Measure Fitness in Escherichia coli <http://dx.plos.org/10.1371/journal.pone.0126210>_. PLoS One.
"""
if ci == 0:
y = y.reshape(y.shape + (1,))
elif y.ndim != 3:
raise ValueError()
w = np.zeros(y.shape[2])
for i in range(y.shape[2]):
At0, Bt0 = y[t0, assay_strain, i], y[t0, ref_strain, i]
At1, Bt1 = y[t1, assay_strain, i], y[t1, ref_strain, i]
w[i] = np.log(At1 / At0) / np.log(Bt1 / Bt0)

if ci == 0:
return w.mean()
else:
margin = (1 - ci) * 50
return w.mean(), np.percentile(w, margin), np.percentile(w, ci * 100 + margin)

[docs]def baranyi_roberts_gd(y, t, *args):
r"""
Fujikawa, Hiroshi, and Mohammad Z Sakha. 2014. Prediction of Microbial Growth in Mixed Culture with a Competition Model. Biocontrol Science 19 (2): 89-92.
"""
y1, y2 = y
K, r, nu, q0, v, a = args
r1, r2 = r
K1, K2 = K
nu1, nu2 = nu
q01, q02 = q0
v1, v2 = v
a1, a2 = a
alfa1 = _alfa(t, q01, v1)
alfa2 = _alfa(t, q02, v2)
dy1dt = r1 * alfa1 * y1 * (1 - (y1/K1)**nu1) * (1 - (y2/K2)**(nu2*a2))
dy2dt = r2 * alfa2 * y2 * (1 - (y1/K1)**(nu1*a1)) * (1 - (y2/K2)**nu2)
return [dy1dt, dy2dt]

[docs]def baranyi_roberts_lv(y, t, *args):
r"""
Fujikawa, Hiroshi, and Mohammad Z Sakha. 2014. Prediction of Microbial Growth in Mixed Culture with a Competition Model. Biocontrol Science 19 (2): 89-92.
"""
y1, y2 = y
K, r, nu, q0, v, a = args
r1, r2 = r
K1, K2 = K
nu1, nu2 = nu
q01, q02 = q0
v1, v2 = v
a1, a2 = a
alfa1 = _alfa(t, q01, v1)
alfa2 = _alfa(t, q02, v2)
Kmax = max(K1**nu1, K2**nu2)
dy1dt = r1 * alfa1 * y1 * (1 - (y1/K1)**nu1) * (1 - (y1**a1 + y2**a2)/Kmax)
dy2dt = r2 * alfa2 * y2 * (1 - (y2/K2)**nu2) * (1 - (y1**a1 + y2**a2)/Kmax)
return [dy1dt, dy2dt]

def baranyi_roberts_yr2(y, t, *args):
y1, y2 = y
K, r, nu, q0, v, a = args
r1, r2 = r
K1, K2 = K
nu1, nu2 = nu
q01, q02 = q0
v1, v2 = v
a1, a2 = a
alfa1 = _alfa(t, q01, v1)
alfa2 = _alfa(t, q02, v2)
dy1dt = r1 * alfa1 * y1 * (1 - (y1/K1)**nu1 - (a2 * y2/K2)**nu2)
dy2dt = r2 * alfa2 * y2 * (1 - (a1 * y1/K1)**nu1 - (y2/K2)**nu2)
return [dy1dt, dy2dt]

def baranyi_roberts_yr(y, t, *args):
y1, y2 = y
K, r, nu, q0, v, a = args
r1, r2 = r
K1, K2 = K
nu1, nu2 = nu
q01, q02 = q0
v1, v2 = v
a1, a2 = a
alfa1 = _alfa(t, q01, v1)
alfa2 = _alfa(t, q02, v2)
dy1dt = r1 * alfa1 * y1 * (1 - (y1**nu1) / (K1**nu1) - a2 * (y2**nu2) / (K1**nu1))
dy2dt = r2 * alfa2 * y2 * (1 - a1 * (y1**nu1) / (K2**nu2) - (y2**nu2) / (K2**nu2))
return [dy1dt, dy2dt]

# obsolete - just call baranyi_roberts_yr(y, t, *args, (1, 1))
# def baranyi_roberts_yr_a1(y, t, *args):
#     return baranyi_roberts_yr(y, t, *args, (1, 1))

def fit_and_compete(m1, m2, df_mixed, y0=None, aguess=(1, 1), fixed=False,
ode=baranyi_roberts_yr, num_of_points=100, method='nelder',
value_var = 'OD', time_var = 'Time',
PLOT=False, colors=sns.color_palette('Set1', 3)):
best_values1 = m1.best_values if hasattr(m1, 'best_values') else m1
best_values2 = m2.best_values if hasattr(m2, 'best_values') else m2
K = best_values1['K'], best_values2['K']
r = best_values1['r'], best_values2['r']
nu = best_values1.get('nu',1), best_values2.get('nu',1)
q0 = best_values1.get('q0', np.inf), best_values2.get('q0', np.inf)
v = best_values1.get('v', r[0]), best_values2.get('v', r[1])
if y0 is None:
y0 = best_values1['y0']/2, best_values2['y0']/2

y_mixed = df_mixed.groupby(time_var)[value_var].mean().values
y_mixed_std = df_mixed.groupby(time_var)[value_var].std().values
t_mixed = np.unique(df_mixed[time_var])
t = np.linspace(0, t_mixed.max(), num_of_points)

if fixed:
a = aguess
a1, a2 = a
else:
def mixed_model(t, a1, a2):
y = odeint(ode, y0, t, args=(K, r, nu, q0, v, (a1, a2)))
return y.sum(axis=1)
model = lmfit.Model(mixed_model)

params = model.make_params(a1=aguess[0], a2=aguess[1])
params['a1'].set(min=1e-1, max=10)
params['a2'].set(min=1e-1, max=10)

result = model.fit(data=y_mixed, t=t_mixed, params=params, weights=1.0 / y_mixed_std,
method=method, nan_policy='propagate')

a = result.best_values['a1'], result.best_values['a2']
a1, a2 = a

y = odeint(ode, y0, t, args=(K, r, nu, q0, v, a))

if PLOT:
ysum = y.sum(axis=1)
p1 = y[:, 0] / ysum
p2 = y[:, 1] / ysum
MRSE = ((odeint(ode, y0, t_mixed, args=(K, r, nu, q0, v, a)).sum(axis=1) - y_mixed)**2).mean()

w, h = plt.rcParams['figure.figsize']
fig, ax = plt.subplots(1, 2, figsize=(w * 2, h), sharex=True)

ax[0].plot(t, y[:, 0], color=colors[0])
ax[0].plot(t, y[:, 1], color=colors[1])
ax[0].plot(t, ysum, color=colors[2])
if not fixed:
ax[0].plot(t_mixed, result.init_fit, ls='-.', color=colors[2])
ax[0].errorbar(t_mixed, y_mixed, y_mixed_std, fmt='o', color=colors[2])

ax[0].set(xlabel='Time (hour)', ylabel='OD',
xlim=(-0.5, t.max() + 0.5),
title="MRSE: {:.2g}".format(MRSE))

ax[1].plot(t, p1, color=colors[0])
ax[1].plot(t, p2, color=colors[1])

ax[1].set(xlabel='Time (hour)', ylabel='Frequency',
xlim=(-0.5, t.max() + 0.5), ylim=(0, 1),
title="a1={:.2g}, a2={:.2g}".format(*a))

sns.despine()
fig.tight_layout()
return t, y, a, fig, ax
return t, y, a

def fit_and_compete_ci(param_samples1, param_samples2, df_mixed, y0=None, ci=0.95,
colors=('g', 'r'), line_kws=None, ci_kws=None, ax=None, PLOT=False):
if line_kws is None:
line_kws = dict()
if ci_kws is None:
ci_kws = dict(color='gray', alpha=0.5)
nsamples = param_samples1.shape[0]
assert param_samples1.shape[0] == param_samples2.shape[0], "Parameters samples should have the same length"
t = [None] * nsamples
y = [None] * nsamples
a = [None] * nsamples
for i in range(nsamples):
sample1 = param_samples1.iloc[i, :]
sample2 = param_samples2.iloc[i, :]
t[i], y[i], a[i] = curveball.competitions.fit_and_compete(sample1, sample2, df_mixed, y0=y0)

t = np.array(t)
y = np.array(y)
a = np.array(a)
ysum = y.sum(axis=2)
f1 = y[:, :, 0] / ysum
f2 = y[:, :, 1] / ysum
f = np.array((f1, f2)).T

margin = (1.0 - ci) * 50.0

low_f = np.percentile(f, margin, axis=1)
high_f = np.percentile(f, ci * 100.0 + margin, axis=1)
avg_f = f.mean(axis=1)

low_a = np.percentile(a, margin, axis=0)
avg_a = a.mean(axis=0)
high_a = np.percentile(a, ci * 100.0 + margin, axis=0)

if PLOT:
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.figure
ax.fill_between(t[0, :], low_f[:, 0], high_f[:, 0], **ci_kws)
ax.plot(t[0,:], avg_f[:, 0], color=colors[0], **line_kws)

ax.fill_between(t[0, :], low_f[:, 1], high_f[:, 1], **ci_kws)
ax.plot(t[0,:], avg_f[:, 1], color=colors[1], **line_kws)

ax.set(xlabel='Time', ylabel='Frequency')
fig.tight_layout()
sns.despine()
return low_a, avg_a, high_a, low_f, avg_f, high_f, fig, ax

return low_a, avg_a, high_a, low_f, avg_f, high_f