diff --git a/doc/images/sm_scatter-2.png b/doc/images/sm_scatter-2.png
index a8eecbd..8d974c5 100644
Binary files a/doc/images/sm_scatter-2.png and b/doc/images/sm_scatter-2.png differ
diff --git a/starvine/bvcopula/copula/clayton_copula.py b/starvine/bvcopula/copula/clayton_copula.py
index a34315e..eabddb0 100644
--- a/starvine/bvcopula/copula/clayton_copula.py
+++ b/starvine/bvcopula/copula/clayton_copula.py
@@ -14,11 +14,11 @@ class ClaytonCopula(CopulaBase):
\f]
"""
def __init__(self, rotation=0, init_params=None):
+ super(ClaytonCopula, self).__init__(rotation, params=init_params)
self.thetaBounds = ((1e-9, np.inf),)
self.theta0 = (1.0, )
self.rotation = rotation
self.name = 'clayton'
- super(ClaytonCopula, self).__init__(rotation, params=init_params)
@CopulaBase._rotPDF
def _pdf(self, u, v, rotation=0, *theta):
diff --git a/starvine/bvcopula/copula/copula_base.py b/starvine/bvcopula/copula/copula_base.py
index 23b8a1e..d2f1475 100644
--- a/starvine/bvcopula/copula/copula_base.py
+++ b/starvine/bvcopula/copula/copula_base.py
@@ -4,6 +4,7 @@
# H function.
from __future__ import print_function, absolute_import, division
import numpy as np
+import six, abc
import scipy.integrate as spi
from scipy.optimize import bisect, newton, brentq
from scipy.optimize import minimize
@@ -12,6 +13,7 @@
warnings.filterwarnings('ignore')
+@six.add_metaclass(abc.ABCMeta)
class CopulaBase(object):
"""!
@brief Bivariate Copula base class. Meant to be subclassed
@@ -21,23 +23,85 @@ class CopulaBase(object):
Copula can be rotated by 90, 180, 270 degrees to accommodate
negative dependence.
"""
- def __init__(self, rotation=0, **kwargs):
+ def __init__(self, rotation=0, thetaBounds=((-np.inf, np.inf),),
+ theta0=(0.0,), name='defaut', **kwargs):
"""!
@brief Init copula
- @param rotation int Copula orientation
+ @param rotation int Copula orientation in [0, 1, 2, 3]
+ @param thetaBounds tuple of tuples should have len = len(theta0).
+ provides upper and lower bounds for each copula parameter
+ @param theta0 tuple initial parameter guesses
+ @name str name of coupula
"""
+ self._thetaBounds = thetaBounds
self.rotation = rotation
+ self.theta0 = theta0
+ self.name = name
self._fittedParams = kwargs.pop("params", None)
@property
def fittedParams(self):
+ """!
+ @brief Fitted parameters from MLE or MCMC
+ """
return self._fittedParams
@fittedParams.setter
def fittedParams(self, fp):
- # TODO: bounds check input fp!
+ if not self._bounds_check(*fp):
+ raise RuntimeError("Fitted params not in bounds.")
self._fittedParams = fp
+ @property
+ def theta0(self):
+ """!
+ @brief Initial parameter guess
+ """
+ return self._theta0
+
+ @theta0.setter
+ def theta0(self, theta):
+ if not self._bounds_check(*theta):
+ raise RuntimeError("Theta_0 not in bounds.")
+ self._theta0 = theta
+
+ @property
+ def thetaBounds(self):
+ """!
+ @brief Copula parameter bounds
+ """
+ return self._thetaBounds
+
+ @thetaBounds.setter
+ def thetaBounds(self, bounds):
+ """!
+ @brief Copula parameter bounds
+ """
+ self._thetaBounds = bounds
+
+ @property
+ def rotation(self):
+ """!
+ @brief Copula orientation
+ """
+ return self._rotation
+
+ @rotation.setter
+ def rotation(self, rot):
+ assert rot in [0, 1, 2, 3]
+ self._rotation = rot
+
+ @property
+ def name(self):
+ """!
+ @brief Copula name
+ """
+ return self._name
+
+ @name.setter
+ def name(self, nm_str):
+ self._name = nm_str
+
# ---------------------------- PUBLIC METHODS ------------------------------ #
def cdf(self, u, v, *theta):
"""!
@@ -75,9 +139,29 @@ def fitMcmc(self, u, v, *theta0, **kwargs):
@param theta0 Initial guess for copula parameter list
@return tuple :
(np_array Array of MLE fit copula parameters,
- int Fitting success flag, 1==success)
+ np_2darray sample array of shape (nparams, nsamples))
"""
- pass
+ from emcee import EnsembleSampler
+ wgts = kwargs.pop("weights", np.ones(len(u)))
+ rotation = 0
+ ln_prob = lambda theta: self._ln_prior(*theta, **kwargs) + \
+ self._ln_like(u, v, wgts, rotation, *theta)
+ if None in theta0:
+ params0 = self.theta0
+ else:
+ params0 = theta0
+ ndim = len(params0)
+ ngen = kwargs.get("ngen", 200)
+ nburn = kwargs.get("nburn", 100)
+ nwalkers = kwargs.get("nwalkers", 50)
+ # initilize walkers in gaussian ball around theta0
+ pos_0 = [np.array(params0) + 1e-6 * np.asarray(params0)*np.random.randn(ndim) for i in range(nwalkers)]
+ emcee_mcmc = EnsembleSampler(nwalkers, ndim, ln_prob)
+ emcee_mcmc.run_mcmc(pos_0, ngen)
+ samples = emcee_mcmc.chain[:, nburn:, :].reshape((-1, ndim))
+ res = np.mean(samples, axis=0)
+ self._fittedParams = res
+ return res, samples
def fitMLE(self, u, v, *theta0, **kwargs):
"""!
@@ -157,8 +241,7 @@ def setRotation(self, rotation=0):
"""!
@brief Set the copula's orientation:
Allows for modeling negative dependence with the
- frank, gumbel, and clayton copulas (Archimedean Copula family is
- non-symmetric)
+ frank, gumbel, and clayton copulas
@param rotation int Copula rotation.
0 == 0 deg,
1 == 90 deg rotation,
@@ -168,6 +251,7 @@ def setRotation(self, rotation=0):
self.rotation = rotation
# ---------------------------- PRIVATE METHODS ------------------------------ #
+ @abc.abstractmethod
def _pdf(self, u, v, rotation=0, *theta):
"""!
@brief Pure virtual density function.
@@ -210,6 +294,7 @@ def _ppf(self, u, v, rotation=0, *theta):
v_hat = self._hinv(u_hat, v, rotation, *theta)
return (u_hat, v_hat)
+ @abc.abstractmethod
def _h(self, u, v, rotation=0, *theta):
"""!
@brief Copula conditional distribution function.
@@ -226,6 +311,7 @@ def _h(self, u, v, rotation=0, *theta):
"""
raise NotImplementedError
+ @abc.abstractmethod
def _hinv(self, u, v, rotation=0, *theta):
"""!
@brief Inverse H function.
@@ -259,12 +345,43 @@ def _logLike(self, u, v, wgts=None, rotation=0, *theta):
wgts = np.ones(len(u))
return np.sum(wgts * np.log(self._pdf(u, v, rotation, *theta)))
+ def _ln_like(self, u, v, wgts=None, rotation=0, *theta):
+ """!
+ @brief log likelihood func wrapper. return -np.inf if
+ theta is out of bounds
+ """
+ try:
+ a = self._logLike(u, v, wgts, rotation, *theta)
+ if np.isnan(a):
+ return -np.inf
+ else:
+ return a
+ except:
+ return -np.inf
+
+ def _bounds_check(self, *theta, **kwargs):
+ """!
+ @brief Check if parameters are in bounds.
+ """
+ bounds=kwargs.pop("bounds", self.thetaBounds)
+ assert len(theta) == len(bounds)
+ for i, param in enumerate(theta):
+ if bounds[i][0] < param < bounds[i][1]:
+ pass
+ else:
+ return False
+ return True
+
+ def _ln_prior(self, *theta, **kwargs):
+ in_bounds = self._bounds_check(*theta, **kwargs)
+ if in_bounds:
+ return 0.0
+ else:
+ return -np.inf
+
def _invhfun_bisect(self, U, V, rotation, *theta):
"""!
@brief Compute inverse of H function using bisection.
- Update (11/08/2016) performance improvement: finish with newton iterations
- Update (12/04/2016) Newton can minimization needs bounds checks!
- TODO: move to scipy.minmize
"""
# Apply limiters
U = np.clip(U, 1e-8, 1. - 1e-8)
@@ -307,6 +424,7 @@ def _AIC(self, u, v, rotation=0, *theta, **kwargs):
AICc = AIC + (2. * k ** 2. + 2. * k) / (len(u) - k - 1)
return AICc
+ @abc.abstractmethod
def _gen(self, t, *theta):
"""!
@brief Copula generator function.
@@ -537,9 +655,6 @@ def wrapper(self, *args, **kwargs):
return 1. - f(self, 1. - u, 1. - v, rot, *nargs)
elif self.rotation == 3:
# 270 deg rotation
- # Clayton is flipped!
- # TODO: Fix GUMBEL 270 rotation
- # return 1. - f(self, 1. - u, v, rot, *nargs) # works for gumbel
return 1. - f(self, u, 1. - v, rot, *nargs)
return wrapper
@@ -563,10 +678,13 @@ def icdf_uv_bisect(X, frozen_marginal_model):
"""
@brief Apply marginal model.
@param X np_1darray samples from copula margin (uniform distributed)
- @param frozen_marginal_model function frozen scipy.stats.rv_continuous python function object
- CDF(ux)
- Note: margin CDF model should be a monotonic function with a range in [0, 1]
+ @param frozen_marginal_model frozen scipy.stats.rv_continuous python object
+ or function inverse cdf fuction
+ Note: inv_CDF should be a monotonic function with a domain in [0, 1]
@return np_1darray Samples drawn from supplied margin model
"""
- icdf = frozen_marginal_model.ppf(X)
+ if hasattr(frozen_marginal_model, 'ppf'):
+ icdf = frozen_marginal_model.ppf(X)
+ else:
+ icdf = frozen_marginal_model(X)
return icdf
diff --git a/starvine/bvcopula/copula/frank_copula.py b/starvine/bvcopula/copula/frank_copula.py
index ece9e3d..54369d1 100644
--- a/starvine/bvcopula/copula/frank_copula.py
+++ b/starvine/bvcopula/copula/frank_copula.py
@@ -14,11 +14,11 @@ class FrankCopula(CopulaBase):
\f$\theta \in [0, \infty) \f$
"""
def __init__(self, rotation=0, init_params=None):
+ super(FrankCopula, self).__init__(rotation, params=init_params)
self.thetaBounds = ((1e-9, np.inf),)
self.theta0 = (1.0,)
self.rotation = rotation
self.name = 'frank'
- super(FrankCopula, self).__init__(rotation, params=init_params)
@CopulaBase._rotPDF
def _pdf(self, u, v, rotation=0, *theta):
diff --git a/starvine/bvcopula/copula/gauss_copula.py b/starvine/bvcopula/copula/gauss_copula.py
index 8561954..37bcdfc 100644
--- a/starvine/bvcopula/copula/gauss_copula.py
+++ b/starvine/bvcopula/copula/gauss_copula.py
@@ -19,11 +19,11 @@ def __init__(self, rotation=0, init_params=None):
@param rotation Int. in (0, 1, 2, 3)
@param init_params List of initial copula parameters
"""
+ super(GaussCopula, self).__init__(rotation, params=init_params)
self.thetaBounds = ((-1 + 1e-9, 1 - 1e-9),)
self.theta0 = (0.7,)
self.name = 'gauss'
self.rotation = rotation
- super(GaussCopula, self).__init__(rotation, params=init_params)
@CopulaBase._rotPDF
def _pdf(self, u, v, rotation=0, *theta):
diff --git a/starvine/bvcopula/copula/gumbel_copula.py b/starvine/bvcopula/copula/gumbel_copula.py
index 9a6562a..b1f6d3d 100644
--- a/starvine/bvcopula/copula/gumbel_copula.py
+++ b/starvine/bvcopula/copula/gumbel_copula.py
@@ -12,11 +12,11 @@ class GumbelCopula(CopulaBase):
\f$\theta \in [1, \infty) \f$
"""
def __init__(self, rotation=0, init_params=None):
+ super(GumbelCopula, self).__init__(rotation, params=init_params)
self.thetaBounds = ((1 + 1e-9, np.inf),)
self.theta0 = (2.0, )
self.rotation = rotation
self.name = 'gumbel'
- super(GumbelCopula, self).__init__(rotation, params=init_params)
@CopulaBase._rotPDF
def _pdf(self, u, v, rotation=0, *theta):
@@ -79,22 +79,27 @@ def _hinv(self, v, u, rotation=0, *theta):
"""
U = np.asarray(u)
V = np.asarray(v)
- # uu = np.zeros(U.size)
- # for i, (ui, vi) in enumerate(zip(U, V)):
- # uu[i] = self._invhfun_bisect(ui, vi, rotation, *theta)
- # return uu
+ uu = np.zeros(U.size)
+ for i, (ui, vi) in enumerate(zip(U, V)):
+ uu[i] = self._invhfun_bisect(ui, vi, rotation, *theta)
+ return uu
# Apply rotation
- if rotation == 1:
- U = 1. - U
- elif rotation == 3:
- V = 1. - V
- elif rotation == 0:
- pass
- vv = self.vec_newton_hinv(V, U, theta[0], z_init=np.ones(len(U)))
- if rotation == 1 or rotation == 3:
- return 1. - vv
- else:
- return vv
+
+ # TODO: Fix! Properly vectorize hinv!
+ # TODO: This is causing an issue in c-vine sampling
+ # It is faster but it is incorrect.
+ #if rotation == 1:
+ # U = 1. - U
+ #elif rotation == 3:
+ # V = 1. - V
+ #elif rotation == 0:
+ # pass
+ #vv = self.vec_newton_hinv(V, U, theta[0], z_init=np.ones(len(U)))
+ #if rotation == 1 or rotation == 3:
+ # return 1. - vv
+ #else:
+ # return vv
+ pass
@CopulaBase._rotGen
def _gen(self, t, *theta):
diff --git a/starvine/bvcopula/copula/indep_copula.py b/starvine/bvcopula/copula/indep_copula.py
index 2f9f90f..e009d98 100644
--- a/starvine/bvcopula/copula/indep_copula.py
+++ b/starvine/bvcopula/copula/indep_copula.py
@@ -7,6 +7,7 @@
class IndepCopula(CopulaBase):
def __init__(self, rotation=0):
+ super(IndepCopula, self).__init__(rotation)
self.name = 'indep'
self.theta0 = (1.0, )
self.fittedParams = [1.0]
diff --git a/starvine/bvcopula/copula/marshall_olkin_copula.py b/starvine/bvcopula/copula/marshall_olkin_copula.py
index 9370cf4..1f8da6c 100644
--- a/starvine/bvcopula/copula/marshall_olkin_copula.py
+++ b/starvine/bvcopula/copula/marshall_olkin_copula.py
@@ -14,11 +14,11 @@ class OlkinCopula(CopulaBase):
\f$\theta \in [1, \infty) \f$
"""
def __init__(self, rotation=0, init_params=None):
+ super(OlkinCopula, self).__init__(rotation, params=init_params)
self.thetaBounds = ((0, 1.), (0, 1.),)
self.theta0 = (0.5, 0.7)
self.rotation = rotation
self.name = 'olkin'
- super(OlkinCopula, self).__init__(rotation, params=init_params)
@CopulaBase._rotPDF
def _pdf(self, u, v, rotation=0, *theta):
diff --git a/starvine/bvcopula/copula/t_copula.py b/starvine/bvcopula/copula/t_copula.py
index 2a66e56..3d142f5 100644
--- a/starvine/bvcopula/copula/t_copula.py
+++ b/starvine/bvcopula/copula/t_copula.py
@@ -22,11 +22,11 @@ class StudentTCopula(CopulaBase):
\f$ \theta_1 \in (2, \infty) \f$
"""
def __init__(self, rotation=0, init_params=None):
+ super(StudentTCopula, self).__init__(rotation, params=init_params)
self.thetaBounds = ((-1 + 1e-9, 1 - 1e-9), (2.0, np.inf),)
self.theta0 = (0.7, 10.0)
self.name = 't'
self.rotation = 0
- super(StudentTCopula, self).__init__(rotation, params=init_params)
@CopulaBase._rotPDF
def _pdf(self, u, v, rotation=0, *theta):
@@ -149,6 +149,8 @@ def _kTau(self, rotation=0, *theta):
kt = (2.0 / np.pi) * np.arcsin(theta[0])
return kt
+ def _gen(self, t, *theta):
+ raise NotImplementedError
def ggamma(x):
return np.log(gammaln(x))
diff --git a/starvine/bvcopula/pc_base.py b/starvine/bvcopula/pc_base.py
index f6cc7ff..4c3987e 100644
--- a/starvine/bvcopula/pc_base.py
+++ b/starvine/bvcopula/pc_base.py
@@ -47,7 +47,7 @@ def __init__(self, x, y, weights=None, resample=0, **kwargs):
self.weights = self.weights / np.average(self.weights)
if resample > 0:
self.resample(resample, kwargs.pop("jitter", 1e-12))
- self.setTrialCopula(kwargs.pop("family", self.defaultFamily))
+ self.setTrialCopula(kwargs.pop("family", {}))
# default data ranking method
self.rank_method = kwargs.pop("rankMethod", 0)
self.rank(self.rank_method)
@@ -124,7 +124,9 @@ def rank_method(self, method):
assert type(method) is int
self._rank_method = method
- def setTrialCopula(self, family):
+ def setTrialCopula(self, family={}):
+ if not family:
+ family = self.defaultFamily
self.trialFamily = family
self.copulaBank = {}
for name, rotation in iteritems(self.trialFamily):
@@ -216,7 +218,7 @@ def copulaTournament(self, criterion='AIC', **kwargs):
self.copulaParams = goldParams
return (self.copulaModel, self.copulaParams)
- def fitCopula(self, copula, thetaGuess=(None, None, )):
+ def fitCopula(self, copula, thetaGuess=(None, None,)):
"""!
@brief fit specified copula to data.
@param copula CopulaBase Copula instance
diff --git a/starvine/bvcopula/tests/test_mcmc_fit.py b/starvine/bvcopula/tests/test_mcmc_fit.py
new file mode 100644
index 0000000..24ab957
--- /dev/null
+++ b/starvine/bvcopula/tests/test_mcmc_fit.py
@@ -0,0 +1,44 @@
+##
+# \brief Tests mcmc estimation of copula paramters
+from __future__ import print_function, division
+import unittest
+from scipy.stats.mstats import rankdata
+# COPULA IMPORTS
+from starvine.bvcopula.copula.gauss_copula import GaussCopula as gc
+import numpy as np
+import os
+pwd_ = os.path.dirname(os.path.abspath(__file__))
+dataDir = pwd_ + "/data/"
+np.random.seed(123)
+tol = 4e-2
+
+
+class TestMcmcFit(unittest.TestCase):
+ def testMcmcCoplulaFit(self):
+ print("--------------------- MCMC COPULA FIT TEST --------------------------")
+ # Load matlab data set
+ stocks = np.loadtxt(dataDir + 'stocks.csv', delimiter=',')
+ x = stocks[:, 0]
+ y = stocks[:, 1]
+
+ # Rank transform the data
+ u = rankdata(x) / (len(x) + 1)
+ v = rankdata(y) / (len(y) + 1)
+
+ # Fit t copula and gaussian copula
+ thetag0 = [0.2]
+ g_copula = gc()
+ theta_g_fit_mle = g_copula.fitMLE(u, v, *thetag0, bounds=((-0.99, 0.99),))[0]
+ aic_g_fit_mle = g_copula._AIC(u, v, 0, *theta_g_fit_mle)
+ theta_g_fit_mcmc = g_copula.fitMcmc(u, v, *thetag0, bounds=((-0.99, 0.99),),
+ ngen=500, nburn=200)[0]
+ aic_g_fit_mcmc = g_copula._AIC(u, v, 0, *theta_g_fit_mcmc)
+ print("Gaussian copula MLE paramter [rho]: " + str(theta_g_fit_mle) + " AIC =" + str(aic_g_fit_mle))
+ print("Gaussian copula MCMC paramter [rho]: " + str(theta_g_fit_mcmc) + " AIC =" + str(aic_g_fit_mcmc))
+
+ # check MLE and MCMC solution are the same in this case
+ self.assertAlmostEqual(theta_g_fit_mle[0], theta_g_fit_mcmc[0], delta=tol)
+
+ # check againt expected
+ true_rho_ranked = 0.7387
+ self.assertAlmostEqual(theta_g_fit_mcmc[0], true_rho_ranked, delta=tol)
diff --git a/starvine/sklearn_vine.py b/starvine/sklearn_vine.py
new file mode 100644
index 0000000..2e80a87
--- /dev/null
+++ b/starvine/sklearn_vine.py
@@ -0,0 +1,59 @@
+##
+# @brief Pipeline interface for using with sklearn pipelines
+##
+from __future__ import print_function, absolute_import, division
+import numpy as np
+import pandas as pd
+from sklearn.base import BaseEstimator
+from starvine.vine.C_vine import Cvine
+
+
+class VineModel(BaseEstimator):
+ """!
+ @brief Implements the fit, fit_transform interface of the sklearn
+ pipeline workflow for vine copula.
+ """
+ def __init__(self, vine_type='c',
+ trial_copula={},
+ copula_fit_method='mle',
+ vine_fit_method='ktau',
+ rank_transform=True):
+ self.trial_copula_dict = trial_copula
+ self._rank_transform = rank_transform
+ if vine_type == 'c':
+ self._vine = Cvine([])
+ else:
+ raise NotImplementedError
+
+ def fit(self, X, weights=None):
+ """
+ @brief Fit vine copula model to data.
+ @param X corrolated vectors with shape (Nsamples, Ndim). Can be
+ unranked or ranked data
+ """
+ if not isinstance(X, pd.DataFrame):
+ tstData = pd.DataFrame(X)
+ else:
+ tstData = X
+ if self.rank_transform:
+ x_r = tstData.dropna().rank()/(len(tstData)+1)
+ else:
+ x_r = tstData
+ self.vine = Cvine(x_r, trial_copula=self.trial_copula_dict)
+ self.vine.constructVine()
+
+ def predict(self, n):
+ """!
+ @brief Predict correlated output vectors given uncorrolated inputs.
+ @param n int. Number of samples to draw.
+ """
+ return self.vine.sample(n)
+
+ @property
+ def rank_transform(self):
+ return self._rank_transform
+
+ @rank_transform.setter
+ def rank_transform(self, rt):
+ self._rank_transform = bool(rt)
+
diff --git a/starvine/vine/C_vine.py b/starvine/vine/C_vine.py
index 775196c..fe66d52 100644
--- a/starvine/vine/C_vine.py
+++ b/starvine/vine/C_vine.py
@@ -12,12 +12,13 @@
import networkx as nx
from starvine.bvcopula import pc_base as pc
import numpy as np
+from six import iteritems
from starvine.vine.tree import Vtree
class Cvine(BaseVine):
"""!
- @brief Cononical vine (C-vine). Provides methods to fit pair
+ @brief Canonical vine (C-vine). Provides methods to fit pair
copula constructions sequentially and simultaneously.
Additional methods are provided to draw samples from a
constructed C-vine.
@@ -53,7 +54,7 @@ class Cvine(BaseVine):
\f[ h(x| v, \theta) = F(x|v, \theta) = \frac{\partial C_{xv}(x,v,\theta)}{\partial v} \f]
- Where we have defined the convinience conditional distribution
+ Where we have defined, for convinience, the conditional distribution:
\f$ h(\cdot) \f$
The nodes of the lower level trees are formed by using \f$ h(x| v,\theta) \f$
@@ -71,9 +72,8 @@ class Cvine(BaseVine):
and the inner product indicies represent
the pair copula constructions (PCC) withen the given tree.
"""
- def __init__(self, data, dataWeights=None):
- self.data = data
- self.weights = dataWeights
+ def __init__(self, data, dataWeights=None, **kwargs):
+ super(Cvine, self).__init__(data, dataWeights, **kwargs)
self.nLevels = int(data.shape[1] - 1)
self.vine = []
@@ -84,7 +84,7 @@ def constructVine(self):
build all tree levels.
"""
# 0th tree build
- tree0 = Ctree(self.data, lvl=0)
+ tree0 = Ctree(self.data, lvl=0, trial_copula=self.trial_copula_dict)
tree0.seqCopulaFit()
self.vine.append(tree0)
# build all other trees
@@ -98,7 +98,8 @@ def buildDeepTrees(self, level=1):
"""
treeT = Ctree(self.vine[level - 1].evalH(),
lvl=level,
- parentTree=self.vine[level - 1])
+ parentTree=self.vine[level - 1],
+ trial_copula=self.trial_copula_dict)
treeT.seqCopulaFit()
if self.nLevels > 1:
self.vine.append(treeT)
@@ -111,7 +112,7 @@ def buildDeepTrees(self, level=1):
class Ctree(Vtree):
"""!
@brief A C-tree is a tree with a single root node.
- Each level of a cononical vine is a C-tree.
+ Each level of a canonical vine is a C-tree.
"""
def __init__(self, data, lvl=None, **kwargs):
"""!
@@ -139,7 +140,7 @@ def seqCopulaFit(self):
def treeNLLH(self, treeCopulaParams=None):
"""!
@brief Compute this tree's negative log likelihood.
- For C-trees this is just the sum of copula-log-likeyhoods over all
+ For C-trees this is just the sum of copula-log-likelihoods over all
node-pairs.
@param treeCopulaParams np_1darray Copula parameter array.
Contains parameters for all PCC in the tree.
@@ -195,16 +196,14 @@ def _optimNodePairs(self):
# iterate though all child nodes,
# root dataset cannot be paired with itself
if nodeID != rootNodeID:
- ## VV is OK UU is wrong!
trialPair = pc.PairCopula(self.tree.node[nodeID]["data"].values,
self.tree.node[rootNodeID]["data"].values)
- # trialPair = pc.PairCopula(self.tree.node[rootNodeID]["data"].values,
- # self.tree.node[nodeID]["data"].values)
trialKtau, trialP = trialPair.empKTau()
trialKtauSum[i] += abs(trialKtau)
- # trialPairings[i].append((rootNodeID, nodeID, trialKtau))
trialPairings[i].append((nodeID, rootNodeID, trialKtau))
+ print("Tree level: %d, configuration: %d, Ktau Metric: %f" % (self.level, i, trialKtauSum[i]))
bestPairingIndex = np.argmax(np.abs(trialKtauSum))
+ print(" === Configuration %d selected === " % (bestPairingIndex))
self.rootNodeID = trialPairings[bestPairingIndex][0][1]
return trialPairings[bestPairingIndex]
@@ -231,8 +230,6 @@ def _evalH(self):
rootData = data["pc"].UU
condData[(nonRootID, rootID)] = data["h-dist"](data["pc"].VV,
data["pc"].UU)
- # condData[(nonRootID, rootID)] = data["h-dist"](nonRootData,
- # rootData)
return condData
def _getEdgeCopulaParams(self, u, v):
diff --git a/starvine/vine/base_vine.py b/starvine/vine/base_vine.py
index 7d402f7..6bbc0d9 100644
--- a/starvine/vine/base_vine.py
+++ b/starvine/vine/base_vine.py
@@ -5,6 +5,7 @@
import networkx as nx
import numpy as np
import pandas as pd
+from six import iteritems
# from starvine.mvar.mv_plot import matrixPairPlot
@@ -12,8 +13,37 @@ class BaseVine(object):
"""!
@brief Regular vine base class.
"""
- def __init__(self, data=None, weights=None):
- pass
+ def __init__(self, data, dataWeights=None, **kwargs):
+ self.trial_copula_dict = \
+ self._validate_trial_copula( \
+ kwargs.get("trial_copula", self._all_trial_copula))
+ self.data = data
+ self.weights = dataWeights
+
+ def _validate_trial_copula(self, trial_copula):
+ assert isinstance(trial_copula, dict)
+ for key, val in iteritems(trial_copula):
+ assert key in self._all_trial_copula
+ assert self._all_trial_copula[key] == val
+
+ @property
+ def _all_trial_copula(self):
+ default_copula = {'t': 0,
+ 'gauss': 0,
+ 'frank': 0,
+ 'frank-90': 1,
+ 'frank-180': 2,
+ 'frank-270': 3,
+ 'clayton': 0,
+ 'clayton-90': 1,
+ 'clayton-180': 2,
+ 'clayton-270': 3,
+ 'gumbel': 0,
+ 'gumbel-90': 1,
+ 'gumbel-180': 2,
+ 'gumbel-270': 3,
+ }
+ return default_copula
def loadVineStructure(self, vS):
"""!
@@ -67,8 +97,9 @@ def treeHfun(self, level=0):
def sample(self, n=1000):
"""!
@brief Draws n samples from the vine.
+ @param n int. number of samples to draw
@returns size == (n, nvars) pandas.DataFrame
- samples from vine
+ samples from vine
"""
# gen random samples
u_n0 = np.random.rand(n)
@@ -81,8 +112,8 @@ def sample(self, n=1000):
# sample from edge of last tree
u_n1 = edge_info["hinv-dist"](u_n0, u_n1)
- edge_sample = {n0: 1. - u_n0, n1: 1. - u_n1}
- # matrixPairPlot(pd.DataFrame(edge_sample), savefig="tree_1_edge_sample.png")
+ edge_sample = {n0: u_n0, n1: u_n1}
+ # matrixPairPlot(pd.DataFrame(edge_sample), savefig="c_test/tree_1_edge_sample.png")
# store edge sample inside graph data struct
current_tree.tree[n0][n1]['sample'] = edge_sample
@@ -117,6 +148,40 @@ def sample(self, n=1000):
# convert sample dict of arrays to dataFrame
return pd.DataFrame(sample_result)
+ def sampleScale(self, n, frozen_margin_dict):
+ """!
+ @brief Sample vine copula and apply inverse transform sampling
+ to margins.
+ @param n int. number of samples to draw.
+ @param frozen_margin_dict dict of frozen single dimensional
+ prob density functions. See: scipy.stats.rv_continuous
+ """
+ df_x = self.sample(n)
+ return self.scaleSamples(df_x, frozen_margin_dict)
+
+ def scaleSamples(self, df_x, frozen_margin_dict):
+ """!
+ @brief Apply inverse transform sampling
+ @param df_x corrolated samples in [0, 1] from vine copula
+ @param frozen_margin_dict dict of frozen single dimensional
+ prob density functions. See: scipy.stats.rv_continuous.
+ Each entry in the dict is an instance of:
+ frozen_marginal_model frozen scipy.stats.rv_continuous
+ python object
+ or is a inverse cdf fuction
+ """
+ sample_result = {}
+ assert isinstance(df_x, pd.DataFrame)
+ for col_name in df_x.columns:
+ assert col_name in frozen_margin_dict.keys()
+ frozen_marginal_model = frozen_margin_dict[col_name]
+ x = df_x[col_name]
+ if hasattr(frozen_marginal_model, 'ppf'):
+ sample_result[col_name] = frozen_marginal_model.ppf(x)
+ else:
+ sample_result[col_name] = frozen_marginal_model(x)
+ return pd.DataFrame(sample_result)
+
def plotVine(self, plotAll=True, savefig=None):
"""!
@brief Plots the vine's graph structure.
diff --git a/starvine/vine/tree.py b/starvine/vine/tree.py
index 62cf02d..145bd4a 100644
--- a/starvine/vine/tree.py
+++ b/starvine/vine/tree.py
@@ -7,7 +7,6 @@
from starvine.bvcopula import pc_base as pc
import networkx as nx
import numpy as np
-# from starvine.mvar.mv_plot import matrixPairPlot
class Vtree(object):
@@ -22,6 +21,7 @@ def __init__(self, data, lvl, parentTree=None, **kwargs):
"""
assert(type(data) is DataFrame)
assert(len(data.shape) == 2)
+ self.trial_copula_dict = kwargs.get("trial_copula", {})
self.data = data
self._upperTree = parentTree
#
@@ -63,7 +63,8 @@ def setEdges(self, nodePairs=None):
pc= \
pc.PairCopula(self.tree.node[pair[0]]["data"],
self.tree.node[pair[1]]["data"],
- id=(pair[0], pair[1])),
+ id=(pair[0], pair[1]),
+ family=self.trial_copula_dict),
id=(pair[0], pair[1]),
edge_data={pair[0]: self.tree.node[pair[0]]["data"],
pair[1]: self.tree.node[pair[1]]["data"]},
@@ -230,7 +231,6 @@ def unrollNodes(l):
u_prev_n0 = prev_edge_info['sample'][prev_n0]
u_prev_n2 = prev_edge_info['sample'][prev_n2]
u_n1 = prev_edge_info["h-dist"](u_prev_n2, u_prev_n0)
- # u_n1 = prev_edge_info["h-dist"](u_prev_n0, u_prev_n2)
else:
u_n1 = np.random.rand(size)
else:
@@ -240,10 +240,9 @@ def unrollNodes(l):
try:
u_n0 = edge_info["hinv-dist"](u_n1, next_tree_info['sample'][(n0, n1)])
except:
- u_n0 = edge_info["hinv-dist"](u_n1, next_tree_info['sample'][(n1, n0)])
- edge_sample = {n0: 1. - u_n0, n1: 1. - u_n1}
- # matrixPairPlot(DataFrame(edge_sample),
- # savefig="edge" + str(n0) + "_" + str(n1) + "sample.png")
+ # u_n0 = edge_info["hinv-dist"](u_n1, next_tree_info['sample'][(n1, n0)])
+ raise RuntimeError("Edge with nodes: " + str((n0, n1)), " does not exist.")
+ edge_sample = {n0: u_n0, n1: u_n1}
current_tree[n0][n1]['sample'] = edge_sample
# If current tree is 0th tree: copy marginal sample to
diff --git a/tests/test_C_vine_construct.py b/tests/test_C_vine_construct.py
index e226284..d9ab37a 100644
--- a/tests/test_C_vine_construct.py
+++ b/tests/test_C_vine_construct.py
@@ -5,6 +5,7 @@
from starvine.vine.C_vine import Cvine
from starvine.mvar.mv_plot import matrixPairPlot
# extra imports
+from scipy.stats import norm, beta
import unittest
import os
import numpy as np
@@ -46,8 +47,47 @@ def testCvineConstruct(self):
tstVine.plotVine(savefig="c_vine_graph_ex.png")
# sample from vine
- samples = tstVine.sample(n=8000)
- matrixPairPlot(samples, savefig="quad_varaite_resampled_ex.png")
+ c_vine_samples = tstVine.sample(n=8000)
+ matrixPairPlot(c_vine_samples, savefig="vine_resampled_ex.png")
+
+ # check that the original data has same correlation coefficients as re-sampled
+ # data from the fitted c-vine
+ tst_rho_matrix = ranked_data.corr(method='pearson')
+ tst_ktau_matrix = ranked_data.corr(method='kendall')
+ sample_rho_matrix = c_vine_samples.corr(method='pearson')
+ sample_ktau_matrix = c_vine_samples.corr(method='kendall')
+ # sort by col labels
+ tst_rho_matrix = tst_rho_matrix.reindex(sorted(tst_rho_matrix.columns), axis=1)
+ tst_ktau_matrix = tst_ktau_matrix.reindex(sorted(tst_ktau_matrix.columns), axis=1)
+ sample_rho_matrix = sample_rho_matrix.reindex(sorted(sample_rho_matrix.columns), axis=1)
+ sample_ktau_matrix = sample_ktau_matrix.reindex(sorted(sample_ktau_matrix.columns), axis=1)
+
+ print("Original data corr matrix:")
+ print(tst_rho_matrix)
+ print("Vine sample corr matrix:")
+ print(sample_rho_matrix)
+ print("Diff:")
+ print(tst_rho_matrix - sample_rho_matrix)
+ self.assertTrue(np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10))
+ self.assertTrue(np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10))
+
+ # fit marginal distributions to original data
+ marginal_dict = {}
+ for col_name in tstData.columns:
+ marginal_dict[col_name] = beta(*beta.fit(tstData[col_name]))
+ # scale the samples
+ c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples, marginal_dict)
+ matrixPairPlot(c_vine_scaled_samples_a, savefig="vine_varaite_resampled_scaled_a.png")
+
+ c_vine_scaled_samples_b = tstVine.sampleScale(8000, marginal_dict)
+
+ # compute correlation coeffs
+ sample_scaled_rho_matrix_a = c_vine_scaled_samples_a.corr(method='pearson')
+ sample_scaled_rho_matrix_b = c_vine_scaled_samples_b.corr(method='pearson')
+
+ # check for consistency
+ self.assertTrue(np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_a, 0, atol=0.1))
+ self.assertTrue(np.allclose(tst_rho_matrix - sample_scaled_rho_matrix_b, 0, atol=0.1))
if __name__ == "__main__":
diff --git a/tests/test_weighted_reg.py b/tests/test_weighted_reg.py
index ee8eb08..7b5b7ec 100644
--- a/tests/test_weighted_reg.py
+++ b/tests/test_weighted_reg.py
@@ -79,13 +79,6 @@ def testWgtCopula(self):
data = pd.DataFrame([x_wt, y_wt]).T
matrixPairPlot(data, savefig='x_gauss_weighted_fit.png')
- def testWgtMargins(self):
- """!
- @brief Test the ability to construct a marginal PDF
- given samples with unequal weights.
- """
- pass
-
def testWgtResampledCopula(self):
"""!
@brief Test ability to construct copula