Skip to content

Commit

Permalink
per-rate scaling in protein and generic kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
amkozlov committed May 22, 2017
1 parent dbf05ed commit 3b5e9bf
Show file tree
Hide file tree
Showing 12 changed files with 1,278 additions and 204 deletions.
43 changes: 43 additions & 0 deletions src/core_derivatives.c
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,46 @@ PLL_EXPORT int pll_core_update_sumtable_ti(unsigned int states,
attrib);
}

unsigned int min_scaler;
unsigned int * rate_scalings = NULL;
int per_rate_scaling = (attrib & PLL_ATTRIB_RATE_SCALERS) ? 1 : 0;

/* powers of scale threshold for undoing the scaling */
double scale_minlh[PLL_SCALE_RATE_MAXDIFF];
if (per_rate_scaling)
{
rate_scalings = (unsigned int*) calloc(rate_cats, sizeof(unsigned int));

double scale_factor = 1.0;
for (i = 0; i < PLL_SCALE_RATE_MAXDIFF; ++i)
{
scale_factor *= PLL_SCALE_THRESHOLD;
scale_minlh[i] = scale_factor;
}
}

/* build sumtable: non-vectorized version, general case */
for (n = 0; n < sites; n++)
{
if (per_rate_scaling)
{
/* compute minimum per-rate scaler -> common per-site scaler */
min_scaler = UINT_MAX;
for (i = 0; i < rate_cats; ++i)
{
rate_scalings[i] = (parent_scaler) ? parent_scaler[n*rate_cats+i] : 0;
if (rate_scalings[i] < min_scaler)
min_scaler = rate_scalings[i];
}

/* compute relative capped per-rate scalers */
for (i = 0; i < rate_cats; ++i)
{
rate_scalings[i] = PLL_MIN(rate_scalings[i] - min_scaler,
PLL_SCALE_RATE_MAXDIFF);
}
}

for (i = 0; i < rate_cats; ++i)
{
t_eigenvecs = eigenvecs[i];
Expand All @@ -393,12 +430,18 @@ PLL_EXPORT int pll_core_update_sumtable_ti(unsigned int states,
tipstate >>= 1;
}
sum[j] = lefterm * righterm;

if (rate_scalings && rate_scalings[i] > 0)
sum[j] *= scale_minlh[rate_scalings[i]-1];
}
t_clvc += states_padded;
sum += states_padded;
}
}

if (rate_scalings)
free(rate_scalings);

return PLL_SUCCESS;
}

Expand Down
125 changes: 124 additions & 1 deletion src/core_derivatives_avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ static int core_update_sumtable_ii_4x4_avx(unsigned int sites,

unsigned int states = 4;

/* scaling stuff*/
unsigned int min_scaler = 0;
unsigned int * rate_scalings = NULL;
int per_rate_scaling = (attrib & PLL_ATTRIB_RATE_SCALERS) ? 1 : 0;
Expand All @@ -55,6 +56,13 @@ static int core_update_sumtable_ii_4x4_avx(unsigned int sites,
{
rate_scalings = (unsigned int*) calloc(rate_cats, sizeof(unsigned int));

if (!rate_scalings)
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf (pll_errmsg, 200, "Cannot allocate memory for rate scalers");
return PLL_FAILURE;
}

double scale_factor = 1.0;
for (i = 0; i < PLL_SCALE_RATE_MAXDIFF; ++i)
{
Expand Down Expand Up @@ -238,6 +246,32 @@ PLL_EXPORT int pll_core_update_sumtable_ii_avx(unsigned int states,

unsigned int states_padded = (states+3) & 0xFFFFFFFC;

/* scaling stuff */
unsigned int min_scaler = 0;
unsigned int * rate_scalings = NULL;
int per_rate_scaling = (attrib & PLL_ATTRIB_RATE_SCALERS) ? 1 : 0;

/* powers of scale threshold for undoing the scaling */
__m256d v_scale_minlh[PLL_SCALE_RATE_MAXDIFF];
if (per_rate_scaling)
{
rate_scalings = (unsigned int*) calloc(rate_cats, sizeof(unsigned int));

if (!rate_scalings)
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf (pll_errmsg, 200, "Cannot allocate memory for rate scalers");
return PLL_FAILURE;
}

double scale_factor = 1.0;
for (i = 0; i < PLL_SCALE_RATE_MAXDIFF; ++i)
{
scale_factor *= PLL_SCALE_THRESHOLD;
v_scale_minlh[i] = _mm256_set1_pd(scale_factor);
}
}

/* padded eigenvecs */
double * tt_eigenvecs = (double *) pll_aligned_alloc (
(states_padded * states_padded * rate_cats) * sizeof(double),
Expand Down Expand Up @@ -282,6 +316,26 @@ PLL_EXPORT int pll_core_update_sumtable_ii_avx(unsigned int states,
/* vectorized loop from update_sumtable() */
for (n = 0; n < sites; n++)
{
/* compute per-rate scalers and obtain minimum value (within site) */
if (per_rate_scaling)
{
min_scaler = UINT_MAX;
for (i = 0; i < rate_cats; ++i)
{
rate_scalings[i] = (parent_scaler) ? parent_scaler[n*rate_cats+i] : 0;
rate_scalings[i] += (child_scaler) ? child_scaler[n*rate_cats+i] : 0;
if (rate_scalings[i] < min_scaler)
min_scaler = rate_scalings[i];
}

/* compute relative capped per-rate scalers */
for (i = 0; i < rate_cats; ++i)
{
rate_scalings[i] = PLL_MIN(rate_scalings[i] - min_scaler,
PLL_SCALE_RATE_MAXDIFF);
}
}

const double * c_eigenvecs = tt_eigenvecs;
const double * ct_inv_eigenvecs = tt_inv_eigenvecs;
for (i = 0; i < rate_cats; ++i)
Expand Down Expand Up @@ -381,6 +435,13 @@ PLL_EXPORT int pll_core_update_sumtable_ii_avx(unsigned int states,

/* update sum */
__m256d v_prod = _mm256_mul_pd (v_lefterm_sum, v_righterm_sum);

/* apply per-rate scalers */
if (rate_scalings && rate_scalings[i] > 0)
{
v_prod = _mm256_mul_pd(v_prod, v_scale_minlh[rate_scalings[i]-1]);
}

_mm256_store_pd (sum + j, v_prod);
}

Expand All @@ -392,6 +453,8 @@ PLL_EXPORT int pll_core_update_sumtable_ii_avx(unsigned int states,

pll_aligned_free (tt_inv_eigenvecs);
pll_aligned_free (tt_eigenvecs);
if (rate_scalings)
free(rate_scalings);

return PLL_SUCCESS;
}
Expand Down Expand Up @@ -426,6 +489,13 @@ static int core_update_sumtable_ti_4x4_avx(unsigned int sites,
{
rate_scalings = (unsigned int*) calloc(rate_cats, sizeof(unsigned int));

if (!rate_scalings)
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf (pll_errmsg, 200, "Cannot allocate memory for rate scalers");
return PLL_FAILURE;
}

double scale_factor = 1.0;
for (i = 0; i < PLL_SCALE_RATE_MAXDIFF; ++i)
{
Expand Down Expand Up @@ -568,7 +638,6 @@ static int core_update_sumtable_ti_4x4_avx(unsigned int sites,

pll_aligned_free(eigenvecs_trans);
pll_aligned_free(precomp_left);

if (rate_scalings)
free(rate_scalings);

Expand Down Expand Up @@ -614,6 +683,32 @@ PLL_EXPORT int pll_core_update_sumtable_ti_avx(unsigned int states,
const double * t_clvc = parent_clv;
const double * t_eigenvecs_padded;

/* scaling stuff */
unsigned int min_scaler = 0;
unsigned int * rate_scalings = NULL;
int per_rate_scaling = (attrib & PLL_ATTRIB_RATE_SCALERS) ? 1 : 0;

/* powers of scale threshold for undoing the scaling */
__m256d v_scale_minlh[PLL_SCALE_RATE_MAXDIFF];
if (per_rate_scaling)
{
rate_scalings = (unsigned int*) calloc(rate_cats, sizeof(unsigned int));

if (!rate_scalings)
{
pll_errno = PLL_ERROR_MEM_ALLOC;
snprintf (pll_errmsg, 200, "Cannot allocate memory for rate scalers");
return PLL_FAILURE;
}

double scale_factor = 1.0;
for (i = 0; i < PLL_SCALE_RATE_MAXDIFF; ++i)
{
scale_factor *= PLL_SCALE_THRESHOLD;
v_scale_minlh[i] = _mm256_set1_pd(scale_factor);
}
}

double * eigenvecs_padded = (double *) pll_aligned_alloc (
(states_padded * states_padded * rate_cats) * sizeof(double),
PLL_ALIGNMENT_AVX);
Expand Down Expand Up @@ -688,6 +783,25 @@ PLL_EXPORT int pll_core_update_sumtable_ti_avx(unsigned int states,
/* build sumtable */
for (n = 0; n < sites; n++)
{
/* compute per-rate scalers and obtain minimum value (within site) */
if (per_rate_scaling)
{
min_scaler = UINT_MAX;
for (i = 0; i < rate_cats; ++i)
{
rate_scalings[i] = (parent_scaler) ? parent_scaler[n*rate_cats+i] : 0;
if (rate_scalings[i] < min_scaler)
min_scaler = rate_scalings[i];
}

/* compute relative capped per-rate scalers */
for (i = 0; i < rate_cats; ++i)
{
rate_scalings[i] = PLL_MIN(rate_scalings[i] - min_scaler,
PLL_SCALE_RATE_MAXDIFF);
}
}

tipstate = (unsigned int) left_tipchars[n];

unsigned int loffset = tipstate * span;
Expand Down Expand Up @@ -757,6 +871,13 @@ PLL_EXPORT int pll_core_update_sumtable_ti_avx(unsigned int states,
__m256d v_lefterm = _mm256_load_pd(t_precomp + j);

__m256d v_sum = _mm256_mul_pd(v_lefterm, v_righterm);

/* apply per-rate scalers */
if (rate_scalings && rate_scalings[i] > 0)
{
v_sum = _mm256_mul_pd(v_sum, v_scale_minlh[rate_scalings[i]-1]);
}

_mm256_store_pd(sum + j, v_sum);
}

Expand All @@ -768,6 +889,8 @@ PLL_EXPORT int pll_core_update_sumtable_ti_avx(unsigned int states,

pll_aligned_free(eigenvecs_padded);
pll_aligned_free(precomp_left);
if (rate_scalings)
free(rate_scalings);

return PLL_SUCCESS;
}
Expand Down
Loading

0 comments on commit 3b5e9bf

Please sign in to comment.