diff --git a/test/out/scaling.out b/test/out/scaling.out
new file mode 100644
index 0000000..11d335c
--- /dev/null
+++ b/test/out/scaling.out
@@ -0,0 +1,64 @@
+scaling = per-site, alpha = 0.050000, rates = [ 0.000000 0.000001 0.005299 3.994700 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 8 6 21 21 4 ]
+per-site logLH: [ -1565.4625 -1079.1468 -inf -inf -812.9101 ]
+logLH INNER-INNER at edge (3997-3995): -inf, LH derivatives: -nan, -nan
+logLH TIP-INNER at edge (3997-1999): -inf, LH derivatives: -nan, -nan
+
+scaling = per-rate, alpha = 0.050000, rates = [ 0.000000 0.000001 0.005299 3.994700 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 8+(50 22 7 0) 6+(50 25 9 0) 15+(48 16 0 5) 15+(47 16 0 5) 4+(50 27 11 0) ]
+per-site logLH: [ -1565.4625 -1079.1468 -2838.4636 -2813.7556 -812.9101 ]
+logLH INNER-INNER at edge (3997-3995): -9109.7386, LH derivatives: -2.7826, 2.9738
+logLH TIP-INNER at edge (3997-1999): -9109.7386, LH derivatives: -1.5936, 1.2950
+
+scaling = per-site, alpha = 0.200000, rates = [ 0.000531 0.033775 0.383658 3.582035 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 8 6 21 21 4 ]
+per-site logLH: [ -1567.6326 -1076.7894 -inf -inf -827.4514 ]
+logLH INNER-INNER at edge (3997-3995): -inf, LH derivatives: -nan, -nan
+logLH TIP-INNER at edge (3997-1999): -inf, LH derivatives: -nan, -nan
+
+scaling = per-rate, alpha = 0.200000, rates = [ 0.000531 0.033775 0.383658 3.582035 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 8+(11 3 1 0) 6+(14 6 1 0) 12+(8 0 2 8) 12+(7 0 2 8) 4+(15 7 3 0) ]
+per-site logLH: [ -1567.6326 -1076.7894 -2277.3326 -2279.8917 -827.4514 ]
+logLH INNER-INNER at edge (3997-3995): -8029.0976, LH derivatives: -2.9702, 3.0540
+logLH TIP-INNER at edge (3997-1999): -8029.0976, LH derivatives: -1.6910, 1.3293
+
+scaling = per-site, alpha = 2.000000, rates = [ 0.293275 0.655014 1.069990 1.981722 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 9 6 13 13 5 ]
+per-site logLH: [ -1603.6166 -1100.1947 -2388.7163 -2399.8925 -913.5515 ]
+logLH INNER-INNER at edge (3997-3995): -8405.9717, LH derivatives: -3.0736, 3.5362
+logLH TIP-INNER at edge (3997-1999): -8405.9717, LH derivatives: -1.4613, 1.3836
+
+scaling = per-rate, alpha = 2.000000, rates = [ 0.293275 0.655014 1.069990 1.981722 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 9+(0 0 0 0) 6+(2 1 0 0) 13+(0 4 6 7) 13+(0 4 6 7) 5+(2 1 0 0) ]
+per-site logLH: [ -1603.6166 -1100.1947 -2388.7163 -2399.8925 -913.5515 ]
+logLH INNER-INNER at edge (3997-3995): -8405.9717, LH derivatives: -3.0736, 3.5362
+logLH TIP-INNER at edge (3997-1999): -8405.9717, LH derivatives: -1.4613, 1.3836
+
+scaling = per-site, alpha = 99.000000, rates = [ 0.875297 0.964547 1.029683 1.130473 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 9 6 18 18 5 ]
+per-site logLH: [ -1658.3038 -1166.0014 -3313.6724 -3314.2351 -1008.3317 ]
+logLH INNER-INNER at edge (3997-3995): -10460.5445, LH derivatives: -2.4820, 3.3850
+logLH TIP-INNER at edge (3997-1999): -10460.5445, LH derivatives: -1.0708, 1.1693
+
+scaling = per-rate, alpha = 99.000000, rates = [ 0.875297 0.964547 1.029683 1.130473 ]
+recompute P-matrices: 3997
+recompute CLVs: 1998
+scaler 1995: [ 9+(0 0 0 0) 6+(0 0 0 0) 18+(0 1 1 1) 18+(0 1 1 1) 5+(0 0 0 0) ]
+per-site logLH: [ -1658.3038 -1166.0014 -3313.6724 -3314.2351 -1008.3317 ]
+logLH INNER-INNER at edge (3997-3995): -10460.5445, LH derivatives: -2.4820, 3.3850
+logLH TIP-INNER at edge (3997-1999): -10460.5445, LH derivatives: -1.0708, 1.1693
+
diff --git a/test/src/README.md b/test/src/README.md
index b395f9d..db63f96 100644
--- a/test/src/README.md
+++ b/test/src/README.md
@@ -95,6 +95,10 @@ and 4 different proportions of invariant sites, from 0.0 to 0.9
Validate Nearest Neighbor Interchange moves.
+## scaling
+
+Validate CLV scaling on large trees (per-site and per-rate scaling modes)
+
## treemove-spr
Validate Subtree Prunning and Regrafting moves.
diff --git a/test/src/scaling.c b/test/src/scaling.c
new file mode 100644
index 0000000..00a3f78
--- /dev/null
+++ b/test/src/scaling.c
@@ -0,0 +1,338 @@
+/*
+ Copyright (C) 2017 Alexey Kozlov, Diego Darriba, Tomas Flouri
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as
+ published by the Free Software Foundation, either version 3 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+ Contact: Alexey Kozlov ,
+ Exelixis Lab, Heidelberg Instutute for Theoretical Studies
+ Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
+*/
+#include "common.h"
+
+#define N_STATES_NT 4
+#define N_STATES_AA 4
+#define N_CAT_GAMMA 4
+#define N_SITES 5
+#define FLOAT_PRECISION 4
+
+#define TREEFILE "testdata/2000.tree"
+
+#define MIN(a,b) (ascale_buffers > 0 && clv_idx >= p->tips) ?
+ clv_idx - p->tips : PLL_SCALE_BUFFER_NONE;
+}
+
+void show_scaler(const pll_partition_t * p, unsigned int clv_idx)
+{
+ unsigned int scaler = scaler_idx(p, clv_idx);
+ if (scaler != PLL_SCALE_BUFFER_NONE)
+ {
+ unsigned int i, j;
+ printf("scaler %u: [ ", scaler);
+ for (i = 0; i < p->sites; ++i)
+ {
+ if (p->attributes & PLL_ATTRIB_RATE_SCALERS)
+ {
+ unsigned int * scalev = p->scale_buffer[scaler] + i*p->rate_cats;
+ unsigned int min_scaler = 1e6;
+ for (j = 0; j < p->rate_cats; ++j)
+ if (scalev[j] < min_scaler)
+ min_scaler = scalev[j];
+
+ printf("%d+(", min_scaler);
+ for (j = 0; j < p->rate_cats; ++j)
+ {
+ unsigned int s = scalev[j] - min_scaler;
+ printf("%u", MIN(50, s));
+ if (j < p->rate_cats-1)
+ printf(" ");
+ }
+ printf(") ");
+ }
+ else
+ printf("%u ", p->scale_buffer[scaler][i]);
+ }
+ printf("]\n");
+ }
+}
+
+void show_clv(const pll_partition_t * p, unsigned int clv_idx, unsigned int site)
+{
+ unsigned int i;
+ unsigned int clv_span = p->states * p->rate_cats;
+ printf("CLV %u site %u (size=%u): [ ", clv_idx, site, clv_span);
+ for (i = (site-1)*clv_span; i < site*clv_span; ++i)
+ printf("%e ", p->clv[clv_idx][i]);
+ printf("]\n");
+
+}
+
+pll_partition_t * init_partition(unsigned int attrs)
+{
+ unsigned int i,j;
+ pll_partition_t * p = pll_partition_create(tree->tip_count,
+ tree->inner_count,
+ N_STATES_NT,
+ N_SITES,
+ 1, /* rate matrices */
+ 2*tree->tip_count - 3,
+ N_CAT_GAMMA,
+ tree->inner_count,
+ attrs);
+
+ if (!p)
+ fatal("ERROR creating partition: %s\n", pll_errmsg);
+
+ size_t len = strlen(nt_alphabet);
+ char * seq = (char *) calloc(N_SITES+1, sizeof(char));
+ for (i = 0; i < tree->tip_count; ++i)
+ {
+ for (j = 0; j < N_SITES; ++j)
+ {
+ seq[j] = (i < 1500) ? nt_alphabet[j % len] : nt_alphabet[(i + j) % len];
+ }
+
+ pll_set_tip_states(p, tree->nodes[i]->clv_index, pll_map_nt, seq);
+ }
+
+ free(seq);
+
+ pll_set_frequencies(p, 0, frequencies);
+ pll_set_subst_params(p, 0, subst_params);
+
+ return p;
+}
+
+void init(unsigned int attrs)
+{
+ unsigned int i;
+
+ tree = pll_utree_parse_newick(TREEFILE);
+
+ if (!tree)
+ fatal("ERROR reading tree file: %s\n", pll_errmsg);
+
+ part_sitescale = init_partition(attrs);
+ part_ratescale = init_partition(attrs | PLL_ATTRIB_RATE_SCALERS);
+
+ /* build fixed structures */
+ unsigned int nodes_count = tree->inner_count + tree->tip_count;
+ unsigned int branch_count = nodes_count - 1;
+ travbuffer = (pll_unode_t **)malloc(nodes_count * sizeof(pll_unode_t *));
+ branch_lengths = (double *)malloc(branch_count * sizeof(double));
+ matrix_indices = (unsigned int *)malloc(branch_count * sizeof(unsigned int));
+ operations = (pll_operation_t *)malloc(tree->inner_count *
+ sizeof(pll_operation_t));
+ persite_lnl = (double *)malloc(part_sitescale->sites * sizeof(double));
+ sumtable = pll_aligned_alloc(part_sitescale->sites *
+ part_sitescale->rate_cats *
+ part_sitescale->states_padded * sizeof(double),
+ part_sitescale->alignment);
+
+ root = tree->nodes[tree->tip_count+tree->inner_count-1];
+
+ /* get full traversal */
+ pll_utree_traverse(root,
+ PLL_TREE_TRAVERSE_POSTORDER,
+ cb_full_traversal,
+ travbuffer,
+ &traversal_size);
+
+ pll_utree_create_operations(travbuffer,
+ traversal_size,
+ branch_lengths,
+ matrix_indices,
+ operations,
+ &matrix_count,
+ &ops_count);
+
+ for (i = 0; i < matrix_count; ++i)
+ branch_lengths[i] = (i % 2 == 0) ? 1.0 : 1e-6;
+}
+
+void comp_derivatives(pll_partition_t * partition, pll_unode_t * r,
+ double brlen, double * d1, double * d2)
+{
+ if (!pll_update_sumtable(partition, r->clv_index, r->back->clv_index,
+ r->scaler_index, r->back->scaler_index,
+ params_indices, sumtable))
+ {
+ fatal("ERROR computing sumtable: %s\n", pll_errmsg);
+ }
+
+ if (!pll_compute_likelihood_derivatives(partition,
+ r->scaler_index,
+ r->back->scaler_index,
+ brlen,
+ params_indices,
+ sumtable,
+ d1, d2))
+ {
+ fatal("ERROR computing derivatives: %s\n", pll_errmsg);
+ }
+}
+
+int eval(pll_partition_t * partition, double alpha)
+{
+ double rate_cats[N_CAT_GAMMA];
+ unsigned int i;
+ double d_f, dd_f;
+
+ pll_compute_gamma_cats(alpha, n_cat_gamma, rate_cats);
+ pll_set_category_rates(partition, rate_cats);
+
+ printf("scaling = ");
+ if (partition->attributes & PLL_ATTRIB_RATE_SCALERS)
+ printf("per-rate");
+ else if (partition->scale_buffers > 0)
+ printf("per-site");
+ else
+ printf("OFF");
+
+ printf(", alpha = %lf, rates = [ ", alpha);
+ for (i = 0; i < n_cat_gamma; ++i)
+ printf("%lf ", rate_cats[i]);
+ printf("]\n");
+
+ printf("recompute P-matrices: %d\n", matrix_count);
+ pll_update_prob_matrices(partition,
+ params_indices,
+ matrix_indices,
+ branch_lengths,
+ matrix_count);
+
+ printf("recompute CLVs: %d\n", ops_count);
+ pll_update_partials(partition, operations, ops_count);
+
+ show_scaler(partition, root->back->clv_index);
+// show_clv(partition, root->back->clv_index, 53);
+// pll_show_pmatrix(partition, root->pmatrix_index, 9);
+// pll_show_pmatrix(partition, root->pmatrix_index-1, 9);
+
+ // test derivatives
+ comp_derivatives(partition, root, 1.0, &d_f, &dd_f);
+
+ double ii_loglh = pll_compute_edge_loglikelihood(partition,
+ root->clv_index,
+ root->scaler_index,
+ root->back->clv_index,
+ root->back->scaler_index,
+ root->pmatrix_index,
+ params_indices,
+ persite_lnl);
+
+ printf("per-site logLH: [ ");
+ for (i = 0; i < partition->sites; ++i)
+ printf("%.4lf ", persite_lnl[i]);
+ printf("]\n");
+ printf("logLH INNER-INNER at edge (%u-%u): %.4lf, LH derivatives: %.4lf, %.4lf\n",
+ root->clv_index, root->back->clv_index, ii_loglh,
+ d_f, dd_f);
+
+ pll_unode_t * new_root = root->next->next;
+ pll_unode_t * tip = new_root->back;
+ assert(tip->clv_index < tree->tip_count);
+
+ // change root CLV orientation
+ pll_operation_t op;
+ op.parent_clv_index = new_root->clv_index;
+ op.child1_clv_index = new_root->next->back->clv_index;
+ op.child2_clv_index = new_root->next->next->back->clv_index;
+ op.child1_matrix_index = new_root->next->pmatrix_index;
+ op.child2_matrix_index = new_root->next->next->pmatrix_index;
+ op.parent_scaler_index = scaler_idx(partition, op.parent_clv_index);
+ op.child1_scaler_index = scaler_idx(partition, op.child1_clv_index);
+ op.child2_scaler_index = scaler_idx(partition, op.child2_clv_index);
+
+ pll_update_partials(partition, &op, 1);
+
+ // test derivatives
+ comp_derivatives(partition, new_root, 1.0, &d_f, &dd_f);
+
+ double ti_loglh = pll_compute_edge_loglikelihood(partition,
+ new_root->clv_index,
+ new_root->scaler_index,
+ tip->clv_index,
+ tip->scaler_index,
+ tip->pmatrix_index,
+ params_indices,
+ persite_lnl);
+
+ printf("logLH TIP-INNER at edge (%u-%u): %.4lf, LH derivatives: %.4lf, %.4lf\n\n",
+ new_root->clv_index, new_root->back->clv_index, ti_loglh,
+ d_f, dd_f);
+
+ if (fabs(ii_loglh - ti_loglh) > 1e-4)
+ {
+ printf("ERROR: Likelihood mismatch INNER-INNER vs TIP-INNER (see above)\n");
+ }
+
+ return 1;
+}
+
+void cleanup()
+{
+ free(travbuffer);
+ free(branch_lengths);
+ free(operations);
+ free(matrix_indices);
+ free(persite_lnl);
+ free(sumtable);
+ pll_partition_destroy(part_noscale);
+ pll_partition_destroy(part_sitescale);
+ pll_partition_destroy(part_ratescale);
+ pll_utree_destroy(tree, NULL);
+}
+
+
+int main(int argc, char * argv[])
+{
+ /* check attributes */
+ unsigned int attributes = get_attributes(argc, argv);
+
+ init(attributes);
+
+ unsigned int i;
+ for (i = 0; i < alpha_count; ++i)
+ {
+ eval(part_sitescale, alphas[i]);
+ eval(part_ratescale, alphas[i]);
+ }
+
+ cleanup();
+
+ return (0);
+}