diff --git a/src/cfunctions.cpp b/src/cfunctions.cpp
index a26ca98..11b5453 100644
--- a/src/cfunctions.cpp
+++ b/src/cfunctions.cpp
@@ -473,11 +473,11 @@ NumericVector cloglik(List dataset, List pset, List control,
   // we could do array reduction on the gradient, but it's not supported in
   // the current C-compiler for windows on cran. So do it manually.
   static double *lh, *totgrad;
-  static double *spellgrad, *llspell, *dllspell; 
+  static double *spellgrad, *llspell, *dllspell, *gneuc; 
   int *nonzero; // only used temporarily in critical section, so not thread local
   if(dofisher) nonzero = (int*) R_alloc(totalpars, sizeof(int)); 
 
-#pragma omp threadprivate(llspell,dllspell, lh, spellgrad, totgrad)
+#pragma omp threadprivate(llspell,dllspell, lh, spellgrad, totgrad, gneuc)
 #pragma omp parallel num_threads(nthreads)
   {  
     // Can't do R_alloc in threads
@@ -488,6 +488,7 @@ NumericVector cloglik(List dataset, List pset, List control,
     if(dograd) {
       dllspell = new double[npoints*npars];
       spellgrad = new double[totalpars];
+      gneuc = new double[totalpars]();
     }
   }
 
@@ -544,14 +545,21 @@ NumericVector cloglik(List dataset, List pset, List control,
       // compute the log likelihood
       ll = logsumofexp(npoints,llspell,logprobs);
       // for many spells we should perhaps do a compensated addition, Kahan/Neumaier?
-      LL += ll;
+      const double t = LL+ll;
+      neuc += (abs(LL) > abs(ll)) ? (LL-t)+ll : (ll-t)+LL;
+      LL = t;
       
       if(dograd) {
 	// compute the spell gradient, update the gradient
 	updategradient(npoints, dllspell, llspell, logprobs, ll,
 		       transitions, npars, nvars, faclevels, pargs, totalpars, spellgrad, onlyprobs);
 	// update global gradient with spell gradient
-	for(int k = 0; k < totalpars; k++) totgrad[k] += spellgrad[k];
+	for(int k = 0; k < totalpars; k++) {
+	  const double t = totgrad[k]+spellgrad[k];
+	  gneuc[k] += (abs(totgrad[k]) > abs(spellgrad[k])) ? (totgrad[k]-t)+spellgrad[k] :
+	    (spellgrad[k] - t) + totgrad[k];
+	  totgrad[k] = t;
+	}
 	if(dofisher) {
 	  // update the fisher matrix from the spellgrad
 	  // we use a global fisher matrix, no omp reduction, so do it in a critical section
@@ -563,15 +571,17 @@ NumericVector cloglik(List dataset, List pset, List control,
   }  // end of spell loop
 
 
-  if(gdiff) LL += neuc;
+  LL += neuc;
   // Deallocate thread private storage
 #pragma omp parallel num_threads(nthreads)
   {  
     delete [] llspell;
     delete [] lh;
     if(dograd) {
+      for(int k = 0; k < gradsize; k++) {totgrad[k] += gneuc[k];}
       delete [] dllspell;
       delete [] spellgrad;
+      delete [] gneuc;
     }
   }
 
@@ -584,12 +594,20 @@ NumericVector cloglik(List dataset, List pset, List control,
     // Reduce the totgrad in a critical section
     NumericVector retgrad(totalpars);
     double *grad = REAL(retgrad);
+    double *neu = new double[gradsize]();
 #pragma omp parallel num_threads(nthreads) 
     {
 #pragma omp critical
-      for(int k = 0; k < gradsize; k++) grad[k] += totgrad[k];
+      for(int k = 0; k < gradsize; k++) {
+	const double t = grad[k] + totgrad[k];
+	neu[k] += (abs(grad[k]) > abs(totgrad[k])) ? (grad[k] - t) + totgrad[k] :
+	  (totgrad[k]-t) + grad[k];
+	grad[k] = t;
+      }
+      delete [] totgrad;
     }
-    if(dograd) {delete [] totgrad;}
+    for(int k = 0; k < gradsize; k++) grad[k] += neu[k];
+    delete [] neu;
     ret.attr("gradient") = retgrad;
   }