diff --git a/Makefile.am b/Makefile.am
index 8d944d7b..c3a999d2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -85,6 +85,7 @@ cpuminer_SOURCES = \
   algo/groestl/aes_ni/hash-groestl.c \
   algo/groestl/aes_ni/hash-groestl256.c \
   algo/fugue/sph_fugue.c \
+  algo/fugue/fugue-aesni.c \
   algo/hamsi/sph_hamsi.c \
   algo/hamsi/hamsi-hash-4way.c \
   algo/haval/haval.c \
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 1fd50fa7..4c6e60f7 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -44,7 +44,7 @@ Please include the following information:
 1. CPU model, operating system, cpuminer-opt version (must be latest),
    binary file for Windows, changes to default build procedure for Linux.
 
-2. Exact comand line (except user and pw) and intial output showing
+2. Exact command line (except user and pw) and intial output showing
    the above requested info.
 
 3. Additional program output showing any error messages or other
@@ -65,6 +65,12 @@ If not what makes it happen or not happen?
 Change Log
 ----------
 
+v3.15.0
+
+Fugue optimized with AES, improves many sha3 algos.
+Minotaur algo optimized for all architectures.
+Fixed neoscrypt BUG log.
+ 
 v3.14.3
 
 #265: more mutex changes to reduce blocking with high thread count.
diff --git a/algo/fugue/fugue-aesni.c b/algo/fugue/fugue-aesni.c
new file mode 100644
index 00000000..dde1b21c
--- /dev/null
+++ b/algo/fugue/fugue-aesni.c
@@ -0,0 +1,567 @@
+/*
+ * file        : fugue_vperm.c
+ * version     : 1.0.208
+ * date        : 14.12.2010
+ * 
+ * - vperm and aes_ni implementations of hash function Fugue
+ * - implements NIST hash api
+ * - assumes that message lenght is multiple of 8-bits
+ * - _FUGUE_VPERM_ must be defined if compiling with ../main.c
+ * - default version is vperm, define AES_NI for aes_ni version
+ * 
+ * Cagdas Calik
+ * ccalik@metu.edu.tr
+ * Institute of Applied Mathematics, Middle East Technical University, Turkey.
+ *
+ */
+
+#if defined(__AES__)
+
+#include <x86intrin.h>
+
+#include <memory.h>
+#include "fugue-aesni.h"
+
+
+MYALIGN const unsigned long long _supermix1a[]	= {0x0202010807020100, 0x0a05000f06010c0b};
+MYALIGN const unsigned long long _supermix1b[]	= {0x0b0d080703060504, 0x0e0a090c050e0f0a};
+MYALIGN const unsigned long long _supermix1c[]	= {0x0402060c070d0003, 0x090a060580808080};
+MYALIGN const unsigned long long _supermix1d[]	= {0x808080800f0e0d0c, 0x0f0e0d0c80808080};
+MYALIGN const unsigned long long _supermix2a[]	= {0x07020d0880808080, 0x0b06010c050e0f0a};
+MYALIGN const unsigned long long _supermix4a[]	= {0x000f0a050c0b0601, 0x0302020404030e09};
+MYALIGN const unsigned long long _supermix4b[]	= {0x07020d08080e0d0d, 0x07070908050e0f0a};
+MYALIGN const unsigned long long _supermix4c[]	= {0x0706050403020000, 0x0302000007060504};
+MYALIGN const unsigned long long _supermix7a[]	= {0x010c0b060d080702, 0x0904030e03000104};
+MYALIGN const unsigned long long _supermix7b[]	= {0x8080808080808080, 0x0504070605040f06};
+MYALIGN const unsigned long long _k_n[] = {0x4E4E4E4E4E4E4E4E, 0x1B1B1B1B0E0E0E0E};
+MYALIGN const unsigned int _maskd3n[] = {0xffffffff, 0xffffffff, 0xffffffff, 0x00000000};
+MYALIGN const unsigned char _shift_one_mask[]   = {7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14, 3, 0, 1, 2};
+MYALIGN const unsigned char _shift_four_mask[]  = {13, 14, 15, 12, 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8};
+MYALIGN const unsigned char _shift_seven_mask[] = {10, 11, 8, 9, 14, 15, 12, 13, 2, 3, 0, 1, 6, 7, 4, 5};
+MYALIGN const unsigned char _aes_shift_rows[]   = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11};
+MYALIGN const unsigned int _inv_shift_rows[] = {0x070a0d00, 0x0b0e0104, 0x0f020508, 0x0306090c};
+MYALIGN const unsigned int _zero[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
+MYALIGN const unsigned int _mul2mask[] = {0x1b1b0000, 0x00000000, 0x00000000, 0x00000000};
+MYALIGN const unsigned int _mul4mask[] = {0x2d361b00, 0x00000000, 0x00000000, 0x00000000};
+MYALIGN const unsigned int _lsbmask2[] = {0x03030303, 0x03030303, 0x03030303, 0x03030303};
+
+
+MYALIGN const unsigned int _IV512[] = {		
+	0x00000000, 0x00000000,	0x7ea50788, 0x00000000,
+	0x75af16e6, 0xdbe4d3c5, 0x27b09aac, 0x00000000,
+	0x17f115d9, 0x54cceeb6, 0x0b02e806, 0x00000000,
+	0xd1ef924a, 0xc9e2c6aa, 0x9813b2dd, 0x00000000,
+	0x3858e6ca, 0x3f207f43, 0xe778ea25, 0x00000000,
+	0xd6dd1f95, 0x1dd16eda, 0x67353ee1, 0x00000000};
+
+#if defined(__SSE4_1__)
+
+#define PACK_S0(s0, s1, t1)\
+   s0 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s0), _mm_castsi128_ps(s1), 0x30))
+
+#define UNPACK_S0(s0, s1, t1)\
+   s1 = _mm_castps_si128(_mm_insert_ps(_mm_castsi128_ps(s1), _mm_castsi128_ps(s0), 0xc0));\
+   s0 = _mm_and_si128(s0, M128(_maskd3n))
+
+#define CMIX(s1, s2, r1, r2, t1, t2)\
+   t1 = s1;\
+   t1 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(t1), _mm_castsi128_ps(s2), _MM_SHUFFLE(3, 0, 2, 1)));\
+   r1 = _mm_xor_si128(r1, t1);\
+   r2 = _mm_xor_si128(r2, t1);
+
+#else   // SSE2
+
+#define PACK_S0(s0, s1, t1)\
+   t1 = _mm_shuffle_epi32(s1, _MM_SHUFFLE(0, 3, 3, 3));\
+   s0 = _mm_xor_si128(s0, t1);
+
+#define UNPACK_S0(s0, s1, t1)\
+   t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 3, 3));\
+   s1 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s1), _mm_castsi128_ps(t1)));\
+   s0 = _mm_and_si128(s0, M128(_maskd3n))
+
+#define CMIX(s1, s2, r1, r2, t1, t2)\
+   t1 = _mm_shuffle_epi32(s1, 0xf9);\
+   t2 = _mm_shuffle_epi32(s2, 0xcf);\
+   t1 = _mm_xor_si128(t1, t2);\
+   r1 = _mm_xor_si128(r1, t1);\
+   r2 = _mm_xor_si128(r2, t1)
+
+#endif
+
+#define TIX256(msg, s10, s8, s24, s0, t1, t2, t3)\
+	t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\
+	s10 = _mm_xor_si128(s10, t1);\
+	t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\
+	s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\
+	t1 = _mm_slli_si128(t1, 8);\
+	s8 = _mm_xor_si128(s8, t1);\
+	t1 = _mm_shuffle_epi32(s24, _MM_SHUFFLE(3, 3, 0, 3));\
+	s0 = _mm_xor_si128(s0, t1)
+
+
+#define TIX384(msg, s16, s8, s27, s30, s0, s4, t1, t2, t3)\
+	t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\
+	s16 = _mm_xor_si128(s16, t1);\
+	t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\
+	s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\
+	t1 = _mm_slli_si128(t1, 8);\
+	s8 = _mm_xor_si128(s8, t1);\
+	t1 = _mm_shuffle_epi32(s27, _MM_SHUFFLE(3, 3, 0, 3));\
+	s0 = _mm_xor_si128(s0, t1);\
+	t1 = _mm_shuffle_epi32(s30, _MM_SHUFFLE(3, 3, 0, 3));\
+	s4 = _mm_xor_si128(s4, t1)
+
+#define TIX512(msg, s22, s8, s24, s27, s30, s0, s4, s7, t1, t2, t3)\
+	t1 = _mm_shuffle_epi32(s0, _MM_SHUFFLE(3, 3, 0, 3));\
+	s22 = _mm_xor_si128(s22, t1);\
+	t1 = _mm_castps_si128(_mm_load_ss((float*)msg));\
+	s0 = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(s0), _mm_castsi128_ps(t1)));\
+	t1 = _mm_slli_si128(t1, 8);\
+	s8 = _mm_xor_si128(s8, t1);\
+	t1 = _mm_shuffle_epi32(s24, _MM_SHUFFLE(3, 3, 0, 3));\
+	s0 = _mm_xor_si128(s0, t1);\
+	t1 = _mm_shuffle_epi32(s27, _MM_SHUFFLE(3, 3, 0, 3));\
+	s4 = _mm_xor_si128(s4, t1);\
+	t1 = _mm_shuffle_epi32(s30, _MM_SHUFFLE(3, 3, 0, 3));\
+	s7 = _mm_xor_si128(s7, t1)
+
+
+#define PRESUPERMIX(x, t1, s1, s2, t2)\
+	s1 = x;\
+	s2 = _mm_add_epi8(x, x);\
+	t2 = _mm_add_epi8(s2, s2);\
+	t1 = _mm_srli_epi16(x, 6);\
+	t1 = _mm_and_si128(t1, M128(_lsbmask2));\
+	s2 = _mm_xor_si128(s2, _mm_shuffle_epi8(M128(_mul2mask), t1));\
+	x  = _mm_xor_si128(t2, _mm_shuffle_epi8(M128(_mul4mask), t1))
+
+#define SUBSTITUTE(r0, _t1, _t2, _t3, _t0)\
+	_t2 = _mm_shuffle_epi8(r0, M128(_inv_shift_rows));\
+	_t2 = _mm_aesenclast_si128(_t2, M128(_zero))
+	
+#define SUPERMIX(t0, t1, t2, t3, t4)\
+	PRESUPERMIX(t0, t1, t2, t3, t4);\
+	POSTSUPERMIX(t0, t1, t2, t3, t4)
+
+
+#define POSTSUPERMIX(t0, t1, t2, t3, t4)\
+	t1 = t2;\
+	t1 = _mm_shuffle_epi8(t1, M128(_supermix1b));\
+	t4 = t1;\
+	t1 = _mm_shuffle_epi8(t1, M128(_supermix1c));\
+	t4 = _mm_xor_si128(t4, t1);\
+	t1 = t4;\
+	t1 = _mm_shuffle_epi8(t1, M128(_supermix1d));\
+	t4 = _mm_xor_si128(t4, t1);\
+	t1 = t2;\
+	t1 = _mm_shuffle_epi8(t1, M128(_supermix1a));\
+	t4 = _mm_xor_si128(t4, t1);\
+	t2 = _mm_xor_si128(t2, t3);\
+	t2 = _mm_xor_si128(t2, t0);\
+	t2 = _mm_shuffle_epi8(t2, M128(_supermix7a));\
+	t4 = _mm_xor_si128(t4, t2);\
+	t2 = _mm_shuffle_epi8(t2, M128(_supermix7b));\
+	t4 = _mm_xor_si128(t4, t2);\
+	t3 = _mm_shuffle_epi8(t3, M128(_supermix2a));\
+	t1 = t0;\
+	t1 = _mm_shuffle_epi8(t1, M128(_supermix4a));\
+	t4 = _mm_xor_si128(t4, t1);\
+	t0 = _mm_shuffle_epi8(t0, M128(_supermix4b));\
+	t0 = _mm_xor_si128(t0, t3);\
+	t4 = _mm_xor_si128(t4, t0);\
+	t0 = _mm_shuffle_epi8(t0, M128(_supermix4c));\
+	t4 = _mm_xor_si128(t4, t0)
+
+
+#define SUBROUND512_3(r1a, r1b, r1c, r1d, r2a, r2b, r2c, r2d, r3a, r3b, r3c, r3d)\
+	CMIX(r1a, r1b, r1c, r1d, _t0, _t1);\
+	PACK_S0(r1c, r1a, _t0);\
+	SUBSTITUTE(r1c, _t1, _t2, _t3, _t0);\
+	SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
+	_t0 = _mm_shuffle_epi32(r1c, 0x39);\
+	r2c = _mm_xor_si128(r2c, _t0);\
+	_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
+	r2d = _mm_xor_si128(r2d, _t0);\
+	UNPACK_S0(r1c, r1a, _t3);\
+	SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
+	SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
+	_t0 = _mm_shuffle_epi32(r2c, 0x39);\
+	r3c = _mm_xor_si128(r3c, _t0);\
+	_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
+	r3d = _mm_xor_si128(r3d, _t0);\
+	UNPACK_S0(r2c, r2a, _t3);\
+	SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
+	SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
+	UNPACK_S0(r3c, r3a, _t3)
+
+
+#define SUBROUND512_4(r1a, r1b, r1c, r1d, r2a, r2b, r2c, r2d, r3a, r3b, r3c, r3d, r4a, r4b, r4c, r4d)\
+	CMIX(r1a, r1b, r1c, r1d, _t0, _t1);\
+	PACK_S0(r1c, r1a, _t0);\
+	SUBSTITUTE(r1c, _t1, _t2, _t3, _t0);\
+	SUPERMIX(_t2, _t3, _t0, _t1, r1c);\
+	_t0 = _mm_shuffle_epi32(r1c, 0x39);\
+	r2c = _mm_xor_si128(r2c, _t0);\
+	_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
+	r2d = _mm_xor_si128(r2d, _t0);\
+	UNPACK_S0(r1c, r1a, _t3);\
+	SUBSTITUTE(r2c, _t1, _t2, _t3, _t0);\
+	SUPERMIX(_t2, _t3, _t0, _t1, r2c);\
+	_t0 = _mm_shuffle_epi32(r2c, 0x39);\
+	r3c = _mm_xor_si128(r3c, _t0);\
+	_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
+	r3d = _mm_xor_si128(r3d, _t0);\
+	UNPACK_S0(r2c, r2a, _t3);\
+	SUBSTITUTE(r3c, _t1, _t2, _t3, _t0);\
+	SUPERMIX(_t2, _t3, _t0, _t1, r3c);\
+	_t0 = _mm_shuffle_epi32(r3c, 0x39);\
+	r4c = _mm_xor_si128(r4c, _t0);\
+	_t0 = _mm_and_si128(_t0, M128(_maskd3n));\
+	r4d = _mm_xor_si128(r4d, _t0);\
+	UNPACK_S0(r3c, r3a, _t3);\
+	SUBSTITUTE(r4c, _t1, _t2, _t3, _t0);\
+	SUPERMIX(_t2, _t3, _t0, _t1, r4c);\
+	UNPACK_S0(r4c, r4a, _t3)
+
+
+
+#define LOADCOLUMN(x, s, a)\
+	block[0] = col[(base + a + 0) % s];\
+	block[1] = col[(base + a + 1) % s];\
+	block[2] = col[(base + a + 2) % s];\
+	block[3] = col[(base + a + 3) % s];\
+	x = _mm_load_si128((__m128i*)block)
+
+#define STORECOLUMN(x, s)\
+	_mm_store_si128((__m128i*)block, x);\
+	col[(base + 0) % s] = block[0];\
+	col[(base + 1) % s] = block[1];\
+	col[(base + 2) % s] = block[2];\
+	col[(base + 3) % s] = block[3]
+
+void Compress512(hashState_fugue *ctx, const unsigned char *pmsg, unsigned int uBlockCount)
+{
+   __m128i _t0, _t1, _t2, _t3;
+
+   switch(ctx->base)
+   {
+      case 1:
+         TIX512( pmsg, ctx->state[3], ctx->state[10], ctx->state[4],
+                       ctx->state[5], ctx->state[ 6], ctx->state[8],
+		       ctx->state[9], ctx->state[10], _t0, _t1, _t2 );
+
+	 SUBROUND512_4( ctx->state[8], ctx->state[9], ctx->state[7],
+                        ctx->state[1], ctx->state[7], ctx->state[8],
+		       	ctx->state[6], ctx->state[0], ctx->state[6],
+		       	ctx->state[7], ctx->state[5], ctx->state[11],
+		       	ctx->state[5], ctx->state[6], ctx->state[4],
+		       	ctx->state[10] );
+         ctx->base++;
+         pmsg += 4;
+         uBlockCount--;
+      if( uBlockCount == 0 ) break;
+
+      case 2:
+         TIX512( pmsg, ctx->state[11], ctx->state[6], ctx->state[0],
+                       ctx->state[ 1], ctx->state[2], ctx->state[4],
+		       ctx->state[ 5], ctx->state[6], _t0, _t1, _t2);
+
+         SUBROUND512_4( ctx->state[4], ctx->state[5], ctx->state[3],
+                        ctx->state[9], ctx->state[3], ctx->state[4],
+		       	ctx->state[2], ctx->state[8], ctx->state[2],
+		       	ctx->state[3], ctx->state[1], ctx->state[7],
+		       	ctx->state[1], ctx->state[2], ctx->state[0],
+		       	ctx->state[6]);
+
+         ctx->base = 0;
+         pmsg += 4;
+         uBlockCount--;
+      break;
+   }
+
+
+   while( uBlockCount > 0 )
+   {
+      TIX512( pmsg, ctx->state[ 7], ctx->state[2], ctx->state[8], ctx->state[9],
+                    ctx->state[10], ctx->state[0], ctx->state[1], ctx->state[2],
+              _t0, _t1, _t2 );
+      SUBROUND512_4( ctx->state[0], ctx->state[1], ctx->state[11],
+                     ctx->state[5], ctx->state[11], ctx->state[0],
+		     ctx->state[10], ctx->state[4], ctx->state[10],
+		     ctx->state[11], ctx->state[9], ctx->state[3],
+		     ctx->state[9], ctx->state[10], ctx->state[8],
+		     ctx->state[2] );
+
+      ctx->base++;
+      pmsg += 4;
+      uBlockCount--;
+      if( uBlockCount == 0 ) break;
+
+      TIX512( pmsg, ctx->state[3], ctx->state[10], ctx->state[4], ctx->state[5],
+                    ctx->state[6], ctx->state[8], ctx->state[9], ctx->state[10],
+              _t0, _t1, _t2 );
+
+      SUBROUND512_4( ctx->state[8], ctx->state[9], ctx->state[7], ctx->state[1],                     ctx->state[7], ctx->state[8], ctx->state[6], ctx->state[0],
+		     ctx->state[6], ctx->state[7], ctx->state[5], ctx->state[11],
+		     ctx->state[5], ctx->state[6, ctx->state[4], ctx->state[10]);
+
+      ctx->base++;
+      pmsg += 4;
+      uBlockCount--;
+      if( uBlockCount == 0 ) break;
+
+      TIX512( pmsg, ctx->state[11], ctx->state[6], ctx->state[0], ctx->state[1],
+		    ctx->state[2], ctx->state[4], ctx->state[5], ctx->state[6],
+               _t0, _t1, _t2);
+      SUBROUND512_4( ctx->state[4], ctx->state[5], ctx->state[3], ctx->state[9],
+		     ctx->state[3], ctx->state[4], ctx->state[2], ctx->state[8],
+		     ctx->state[2], ctx->state[3], ctx->state[1], ctx->state[7],
+		     ctx->state[1], ctx->state[2], ctx->state[0], ctx->state[6]);
+
+      ctx->base = 0;
+      pmsg += 4;
+      uBlockCount--;
+   }
+
+}
+
+void Final512(hashState_fugue *ctx, BitSequence *hashval)
+{
+        unsigned int block[4] __attribute__ ((aligned (32)));
+        unsigned int col[36] __attribute__ ((aligned (16)));
+	unsigned int i, base;
+	__m128i r0, _t0, _t1, _t2, _t3;
+
+	for(i = 0; i < 12; i++)
+	{
+		_mm_store_si128((__m128i*)block, ctx->state[i]);
+
+		col[3 * i + 0] = block[0];
+		col[3 * i + 1] = block[1];
+		col[3 * i + 2] = block[2];
+	}
+
+	base = (36 - (12 * ctx->base)) % 36;
+
+	for(i = 0; i < 32; i++)
+	{
+		// ROR3
+		base = (base + 33) % 36;
+
+		// CMIX
+		col[(base +  0) % 36] ^= col[(base + 4) % 36];
+		col[(base +  1) % 36] ^= col[(base + 5) % 36];
+		col[(base +  2) % 36] ^= col[(base + 6) % 36];
+		col[(base +  18) % 36] ^= col[(base + 4) % 36];
+		col[(base +  19) % 36] ^= col[(base + 5) % 36];
+		col[(base +  20) % 36] ^= col[(base + 6) % 36];
+
+		// SMIX
+		LOADCOLUMN(r0, 36, 0);
+		SUBSTITUTE(r0, _t1, _t2, _t3, _t0);
+		SUPERMIX(_t2, _t3, _t0, _t1, r0);
+		STORECOLUMN(r0, 36);
+	}
+
+	for(i = 0; i < 13; i++)
+	{
+		// S4 += S0; S9 += S0; S18 += S0; S27 += S0;
+		col[(base +  4) % 36] ^= col[(base + 0) % 36];
+		col[(base +  9) % 36] ^= col[(base + 0) % 36];
+		col[(base + 18) % 36] ^= col[(base + 0) % 36];
+		col[(base + 27) % 36] ^= col[(base + 0) % 36];
+
+		// ROR9
+		base = (base + 27) % 36;
+
+		// SMIX
+		LOADCOLUMN(r0, 36, 0);
+		SUBSTITUTE(r0, _t1, _t2, _t3, _t0);
+		SUPERMIX(_t2, _t3, _t0, _t1, r0);
+		STORECOLUMN(r0, 36);
+
+		// S4 += S0; S10 += S0; S18 += S0; S27 += S0;
+		col[(base +  4) % 36] ^= col[(base + 0) % 36];
+		col[(base + 10) % 36] ^= col[(base + 0) % 36];
+		col[(base + 18) % 36] ^= col[(base + 0) % 36];
+		col[(base + 27) % 36] ^= col[(base + 0) % 36];
+
+		// ROR9
+		base = (base + 27) % 36;
+
+		// SMIX
+		LOADCOLUMN(r0, 36, 0);
+		SUBSTITUTE(r0, _t1, _t2, _t3, _t0);
+		SUPERMIX(_t2, _t3, _t0, _t1, r0);
+		STORECOLUMN(r0, 36);
+
+		// S4 += S0; S10 += S0; S19 += S0; S27 += S0;
+		col[(base +  4) % 36] ^= col[(base + 0) % 36];
+		col[(base + 10) % 36] ^= col[(base + 0) % 36];
+		col[(base + 19) % 36] ^= col[(base + 0) % 36];
+		col[(base + 27) % 36] ^= col[(base + 0) % 36];
+
+		// ROR9
+		base = (base + 27) % 36;
+
+		// SMIX
+		LOADCOLUMN(r0, 36, 0);
+		SUBSTITUTE(r0, _t1, _t2, _t3, _t0);
+		SUPERMIX(_t2, _t3, _t0, _t1, r0);
+		STORECOLUMN(r0, 36);
+
+		// S4 += S0; S10 += S0; S19 += S0; S28 += S0;
+		col[(base +  4) % 36] ^= col[(base + 0) % 36];
+		col[(base + 10) % 36] ^= col[(base + 0) % 36];
+		col[(base + 19) % 36] ^= col[(base + 0) % 36];
+		col[(base + 28) % 36] ^= col[(base + 0) % 36];
+
+		// ROR8
+		base = (base + 28) % 36;
+
+		// SMIX
+		LOADCOLUMN(r0, 36, 0);
+		SUBSTITUTE(r0, _t1, _t2, _t3, _t0);
+		SUPERMIX(_t2, _t3, _t0, _t1, r0);
+		STORECOLUMN(r0, 36);
+	}
+
+	// S4 += S0; S9 += S0; S18 += S0; S27 += S0;
+	col[(base +  4) % 36] ^= col[(base + 0) % 36];
+	col[(base +  9) % 36] ^= col[(base + 0) % 36];
+	col[(base + 18) % 36] ^= col[(base + 0) % 36];
+	col[(base + 27) % 36] ^= col[(base + 0) % 36];
+
+	// Transform to the standard basis and store output; S1 || S2 || S3 || S4
+	LOADCOLUMN(r0, 36, 1);
+	_mm_store_si128((__m128i*)hashval, r0);
+
+	// Transform to the standard basis and store output; S9 || S10 || S11 || S12
+	LOADCOLUMN(r0, 36, 9);
+	_mm_store_si128((__m128i*)hashval + 1, r0);
+
+	// Transform to the standard basis and store output; S18 || S19 || S20 || S21
+	LOADCOLUMN(r0, 36, 18);
+	_mm_store_si128((__m128i*)hashval + 2, r0);
+
+	// Transform to the standard basis and store output; S27 || S28 || S29 || S30
+	LOADCOLUMN(r0, 36, 27);
+	_mm_store_si128((__m128i*)hashval + 3, r0);
+}
+
+HashReturn fugue512_Init(hashState_fugue *ctx, int nHashSize)
+{
+	int i;
+	ctx->processed_bits = 0;
+	ctx->uBufferBytes = 0;
+	ctx->base = 0;
+
+
+	ctx->uHashSize = 512;
+	ctx->uBlockLength = 4;
+
+	for(i = 0; i < 6; i++)
+		ctx->state[i] = _mm_setzero_si128();
+
+	ctx->state[6]  = _mm_load_si128((__m128i*)_IV512 + 0);
+	ctx->state[7]  = _mm_load_si128((__m128i*)_IV512 + 1);
+	ctx->state[8]  = _mm_load_si128((__m128i*)_IV512 + 2);
+	ctx->state[9]  = _mm_load_si128((__m128i*)_IV512 + 3);
+	ctx->state[10] = _mm_load_si128((__m128i*)_IV512 + 4);
+	ctx->state[11] = _mm_load_si128((__m128i*)_IV512 + 5);
+
+	return SUCCESS;
+}
+
+
+HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen)
+{
+	unsigned int uByteLength, uBlockCount, uRemainingBytes;
+
+	uByteLength = (unsigned int)(databitlen / 8);
+
+	if(state->uBufferBytes + uByteLength >= state->uBlockLength)
+	{
+		if(state->uBufferBytes != 0)
+		{
+			// Fill the buffer
+			memcpy(state->buffer + state->uBufferBytes, (void*)data, state->uBlockLength - state->uBufferBytes);
+
+			// Process the buffer
+			Compress512(state, state->buffer, 1);
+
+			state->processed_bits += state->uBlockLength * 8;
+			data += state->uBlockLength - state->uBufferBytes;
+			uByteLength -= state->uBlockLength - state->uBufferBytes;
+		}
+
+		// buffer now does not contain any unprocessed bytes
+
+		uBlockCount = uByteLength / state->uBlockLength;
+		uRemainingBytes = uByteLength % state->uBlockLength;
+
+		if(uBlockCount > 0)
+		{
+			Compress512(state, data, uBlockCount);
+
+			state->processed_bits += uBlockCount * state->uBlockLength * 8;
+			data += uBlockCount * state->uBlockLength;
+		}
+
+		if(uRemainingBytes > 0)
+		{
+			memcpy(state->buffer, (void*)data, uRemainingBytes);
+		}
+
+		state->uBufferBytes = uRemainingBytes;
+	}
+	else
+	{
+		memcpy(state->buffer + state->uBufferBytes, (void*)data, uByteLength);
+		state->uBufferBytes += uByteLength;
+	}
+
+	return SUCCESS;
+}
+
+HashReturn fugue512_Final(hashState_fugue *state, void *hashval)
+{
+	unsigned int i;
+	BitSequence lengthbuf[8] __attribute__((aligned(64)));
+
+	// Update message bit count
+	state->processed_bits += state->uBufferBytes * 8;
+
+	// Pad the remaining buffer bytes with zero
+	if(state->uBufferBytes != 0)
+	{
+	   if ( state->uBufferBytes != state->uBlockLength)
+		memset(state->buffer + state->uBufferBytes, 0, state->uBlockLength - state->uBufferBytes);
+
+	   Compress512(state, state->buffer, 1);
+	}
+
+	// Last two blocks are message length in bits
+	for(i = 0; i < 8; i++)
+           lengthbuf[i] = ((state->processed_bits) >> (8 * (7 - i))) & 0xff;
+
+	// Process the last two blocks
+	Compress512(state, lengthbuf, 2);
+
+	// Finalization
+	Final512(state, hashval);
+
+	return SUCCESS;
+}
+
+
+HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen)
+{
+	fugue512_Init(hs, 512);
+	fugue512_Update(hs, data, databitlen*8);
+	fugue512_Final(hs, hashval);
+	return SUCCESS;
+}
+
+#endif  // AES
diff --git a/algo/fugue/fugue-aesni.h b/algo/fugue/fugue-aesni.h
new file mode 100644
index 00000000..92a0a2ef
--- /dev/null
+++ b/algo/fugue/fugue-aesni.h
@@ -0,0 +1,46 @@
+/*
+ * file        : hash_api.h
+ * version     : 1.0.208
+ * date        : 14.12.2010
+ * 
+ * Fugue vperm implementation Hash API
+ *
+ * Cagdas Calik
+ * ccalik@metu.edu.tr
+ * Institute of Applied Mathematics, Middle East Technical University, Turkey.
+ *
+ */
+
+#ifndef FUGUE_HASH_API_H
+#define FUGUE_HASH_API_H
+
+#if defined(__AES__)
+
+#include "algo/sha/sha3_common.h"
+#include <x86intrin.h>
+
+
+typedef struct
+{
+	__m128i			state[12];
+	unsigned int	base;
+
+	unsigned int	uHashSize;
+	unsigned int	uBlockLength;
+	unsigned int	uBufferBytes;
+	DataLength		processed_bits;
+	BitSequence		buffer[4];
+
+} hashState_fugue __attribute__ ((aligned (64)));
+
+HashReturn fugue512_Init(hashState_fugue *state, int hashbitlen);
+
+HashReturn fugue512_Update(hashState_fugue *state, const void *data, DataLength databitlen);
+
+HashReturn fugue512_Final(hashState_fugue *state, void *hashval);
+
+HashReturn fugue512_full(hashState_fugue *hs, void *hashval, const void *data, DataLength databitlen);
+
+#endif // AES
+#endif // HASH_API_H
+
diff --git a/algo/quark/hmq1725-4way.c b/algo/quark/hmq1725-4way.c
index 8832ea79..e742efa8 100644
--- a/algo/quark/hmq1725-4way.c
+++ b/algo/quark/hmq1725-4way.c
@@ -16,7 +16,7 @@
 #include "algo/simd/simd-hash-2way.h"
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/shabal/shabal-hash-4way.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/haval/haval-hash-4way.h"
@@ -40,7 +40,7 @@ union _hmq1725_8way_context_overlay
     cube_4way_context       cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_8way_context     sha512;
@@ -363,14 +363,14 @@ extern void hmq1725_8way_hash(void *state, const void *input)
    dintrlv_8x64_512( hash0, hash1, hash2, hash3,
                      hash4, hash5, hash6, hash7, vhash );
 
-   sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-   sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-   sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-   sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
-   sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
-   sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
-   sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
-   sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+   fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+   fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+   fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+   fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+   fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+   fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+   fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+   fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
    intrlv_8x64_512( vhash, hash0, hash1, hash2, hash3,
                            hash4, hash5, hash6, hash7 );
@@ -459,21 +459,21 @@ extern void hmq1725_8way_hash(void *state, const void *input)
                                        m512_zero );
 
    if ( hash0[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+      fugue512_full( &ctx.fugue, hash0, hash0, 64 );
    if ( hash1[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+      fugue512_full( &ctx.fugue, hash1, hash1, 64 );
    if ( hash2[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+      fugue512_full( &ctx.fugue, hash2, hash2, 64 );
    if ( hash3[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+      fugue512_full( &ctx.fugue, hash3, hash3, 64 );
    if ( hash4[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+      fugue512_full( &ctx.fugue, hash4, hash4, 64 );
    if ( hash5[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+      fugue512_full( &ctx.fugue, hash5, hash5, 64 );
    if ( hash6[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+      fugue512_full( &ctx.fugue, hash6, hash6, 64 );
    if ( hash7[0] & mask )
-      sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+      fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
    intrlv_8x64_512( vhashA, hash0, hash1, hash2, hash3,
                             hash4, hash5, hash6, hash7 );
@@ -628,7 +628,7 @@ union _hmq1725_4way_context_overlay
     simd_2way_context       simd;
     hashState_echo          echo;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_4way_context     sha512;
@@ -846,10 +846,10 @@ extern void hmq1725_4way_hash(void *state, const void *input)
 
     dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
 
-    sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-    sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-    sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-    sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+    fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+    fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+    fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+    fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
     // In this situation serial simd seems to be faster.
 
@@ -920,13 +920,13 @@ extern void hmq1725_4way_hash(void *state, const void *input)
    h_mask = _mm256_movemask_epi8( vh_mask );
 
    if ( hash0[0] & mask ) 
-      sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+      fugue512_full( &ctx.fugue, hash0, hash0, 64 );
    if ( hash1[0] & mask ) 
-      sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+      fugue512_full( &ctx.fugue, hash1, hash1, 64 );
    if ( hash2[0] & mask ) 
-      sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+      fugue512_full( &ctx.fugue, hash2, hash2, 64 );
    if ( hash3[0] & mask ) 
-      sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+      fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
    intrlv_4x64( vhashA, hash0, hash1, hash2, hash3, 512 );
 
diff --git a/algo/quark/hmq1725.c b/algo/quark/hmq1725.c
index ea1ca623..41e3cfcb 100644
--- a/algo/quark/hmq1725.c
+++ b/algo/quark/hmq1725.c
@@ -21,9 +21,11 @@
 #if defined(__AES__)
   #include "algo/groestl/aes_ni/hash-groestl.h"
   #include "algo/echo/aes_ni/hash_api.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 #include "algo/luffa/luffa_for_sse2.h"
 #include "algo/cubehash/cubehash_sse2.h"
@@ -40,7 +42,6 @@ typedef struct {
   sph_shavite512_context  shavite1, shavite2;
   hashState_sd            simd1, simd2;
   sph_hamsi512_context    hamsi1;
-  sph_fugue512_context    fugue1, fugue2;
   sph_shabal512_context   shabal1;
   sph_whirlpool_context   whirlpool1, whirlpool2, whirlpool3, whirlpool4;
   SHA512_CTX              sha1, sha2;
@@ -48,9 +49,11 @@ typedef struct {
 #if defined(__AES__)
   hashState_echo          echo1, echo2;
   hashState_groestl       groestl1, groestl2;
+  hashState_fugue         fugue1, fugue2;
 #else
   sph_groestl512_context  groestl1, groestl2;
   sph_echo512_context     echo1, echo2;
+  sph_fugue512_context    fugue1, fugue2;
 #endif
 } hmq1725_ctx_holder;
 
@@ -88,8 +91,13 @@ void init_hmq1725_ctx()
 
     sph_hamsi512_init(&hmq1725_ctx.hamsi1);
 
+#if defined(__AES__)
+    fugue512_Init( &hmq1725_ctx.fugue1, 512 );
+    fugue512_Init( &hmq1725_ctx.fugue2, 512 );
+#else
     sph_fugue512_init(&hmq1725_ctx.fugue1);
     sph_fugue512_init(&hmq1725_ctx.fugue2);
+#endif
 
     sph_shabal512_init(&hmq1725_ctx.shabal1);
 
@@ -235,8 +243,13 @@ extern void hmq1725hash(void *state, const void *input)
     sph_hamsi512 (&h_ctx.hamsi1, hashA, 64); //3
     sph_hamsi512_close(&h_ctx.hamsi1, hashB); //4
 
+#if defined(__AES__)
+    fugue512_Update( &h_ctx.fugue1, hashB, 512 ); //2   ////
+    fugue512_Final( &h_ctx.fugue1, hashA ); //3 
+#else
     sph_fugue512 (&h_ctx.fugue1, hashB, 64); //2   ////
     sph_fugue512_close(&h_ctx.fugue1, hashA); //3 
+#endif
 
     if ( hashA[0] & mask ) //4
     {
@@ -262,8 +275,13 @@ extern void hmq1725hash(void *state, const void *input)
 
     if ( hashB[0] & mask ) //7
     {
+#if defined(__AES__)
+        fugue512_Update( &h_ctx.fugue2, hashB, 512 ); //
+        fugue512_Final( &h_ctx.fugue2, hashA ); //8
+#else
         sph_fugue512 (&h_ctx.fugue2, hashB, 64); //
         sph_fugue512_close(&h_ctx.fugue2, hashA); //8
+#endif
     }
     else
     {
diff --git a/algo/scrypt/neoscrypt.c b/algo/scrypt/neoscrypt.c
index 9003e59b..7cb4c828 100644
--- a/algo/scrypt/neoscrypt.c
+++ b/algo/scrypt/neoscrypt.c
@@ -1051,16 +1051,16 @@ int scanhash_neoscrypt( struct work *work,
     uint32_t _ALIGN(64) hash[8];
     const uint32_t Htarg = ptarget[7];
     const uint32_t first_nonce = pdata[19];
-    int thr_id = mythr->id;  // thr_id arg is deprecated
+    int thr_id = mythr->id; 
 
     while (pdata[19] < max_nonce && !work_restart[thr_id].restart)
     {
         neoscrypt((uint8_t *) hash, (uint8_t *) pdata );
 
         /* Quick hash check */
-        if (hash[7] <= Htarg && fulltest_le(hash, ptarget)) {
-            *hashes_done = pdata[19] - first_nonce + 1;
-            return 1;
+        if (hash[7] <= Htarg && fulltest_le(hash, ptarget))
+        {
+          submit_solution( work, hash, mythr );
         }
 
         pdata[19]++;
diff --git a/algo/x13/phi1612-4way.c b/algo/x13/phi1612-4way.c
index 192b081a..389c87c5 100644
--- a/algo/x13/phi1612-4way.c
+++ b/algo/x13/phi1612-4way.c
@@ -7,7 +7,7 @@
 #include "algo/jh/jh-hash-4way.h"
 #include "algo/cubehash/cubehash_sse2.h"
 #include "algo/cubehash/cube-hash-2way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/gost/sph_gost.h"
 #include "algo/echo/aes_ni/hash_api.h"
 #if defined(__VAES__)
@@ -20,7 +20,7 @@ typedef struct {
     skein512_8way_context   skein;
     jh512_8way_context      jh;
     cube_4way_context       cube;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     sph_gost512_context     gost;
 #if defined(__VAES__)
     echo_4way_context       echo;
@@ -36,7 +36,7 @@ void init_phi1612_8way_ctx()
      skein512_8way_init( &phi1612_8way_ctx.skein );
      jh512_8way_init( &phi1612_8way_ctx.jh );
      cube_4way_init( &phi1612_8way_ctx.cube, 512, 16, 32 );
-     sph_fugue512_init( &phi1612_8way_ctx.fugue );
+     fugue512_Init( &phi1612_8way_ctx.fugue, 512 );
      sph_gost512_init( &phi1612_8way_ctx.gost );
 #if defined(__VAES__)
      echo_4way_init( &phi1612_8way_ctx.echo, 512 );
@@ -79,29 +79,14 @@ void phi1612_8way_hash( void *state, const void *input )
      dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
 
      // Fugue
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash4, 64 );
-     sph_fugue512_close( &ctx.fugue, hash4 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash5, 64 );
-     sph_fugue512_close( &ctx.fugue, hash5 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash6, 64 );
-     sph_fugue512_close( &ctx.fugue, hash6 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash7, 64 );
-     sph_fugue512_close( &ctx.fugue, hash7 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      // Gost
      sph_gost512( &ctx.gost, hash0, 64 );
@@ -223,7 +208,7 @@ typedef struct {
     skein512_4way_context   skein;
     jh512_4way_context      jh;
     cubehashParam           cube;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     sph_gost512_context     gost;
     hashState_echo          echo;
 } phi1612_4way_ctx_holder;
@@ -235,7 +220,6 @@ void init_phi1612_4way_ctx()
      skein512_4way_init( &phi1612_4way_ctx.skein );
      jh512_4way_init( &phi1612_4way_ctx.jh );
      cubehashInit( &phi1612_4way_ctx.cube, 512, 16, 32 );
-     sph_fugue512_init( &phi1612_4way_ctx.fugue );
      sph_gost512_init( &phi1612_4way_ctx.gost );
      init_echo( &phi1612_4way_ctx.echo, 512 );
 };
@@ -275,17 +259,10 @@ void phi1612_4way_hash( void *state, const void *input )
      cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
 
      // Fugue
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      // Gost
      sph_gost512( &ctx.gost, hash0, 64 );
diff --git a/algo/x13/phi1612.c b/algo/x13/phi1612.c
index bbb86bd4..13c79a3e 100644
--- a/algo/x13/phi1612.c
+++ b/algo/x13/phi1612.c
@@ -8,24 +8,28 @@
 #include <stdio.h>
 #include "algo/gost/sph_gost.h"
 #include "algo/echo/sph_echo.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/cubehash/cubehash_sse2.h"
 #include "algo/skein/sph_skein.h"
 #include "algo/jh/sph_jh.h"
 #ifdef __AES__
   #include "algo/echo/aes_ni/hash_api.h"
+  #include "algo/fugue/fugue-aesni.h"
+#else
+  #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 typedef struct {
      sph_skein512_context    skein;
      sph_jh512_context       jh;
      cubehashParam           cube;
-     sph_fugue512_context    fugue;
      sph_gost512_context     gost;
 #ifdef __AES__
      hashState_echo          echo;
+     hashState_fugue         fugue;
 #else
      sph_echo512_context     echo;
+     sph_fugue512_context    fugue;
 #endif
 } phi_ctx_holder;
 
@@ -42,8 +46,10 @@ void init_phi1612_ctx()
      sph_gost512_init( &phi_ctx.gost );
 #ifdef __AES__
      init_echo( &phi_ctx.echo, 512 );
+     fugue512_Init( &phi_ctx.fugue, 512 );
 #else
      sph_echo512_init( &phi_ctx.echo );
+     sph_fugue512_init( &phi_ctx.fugue );
 #endif
 }
 
@@ -69,8 +75,13 @@ void phi1612_hash(void *output, const void *input)
 
      cubehashUpdateDigest( &ctx.cube, (byte*) hash, (const byte*)hash, 64 );
 
+#if defined(__AES__)
+     fugue512_Update( &ctx.fugue, hash, 512 ); 
+     fugue512_Final( &ctx.fugue, hash ); 
+#else
      sph_fugue512( &ctx.fugue, (const void*)hash, 64 );
      sph_fugue512_close( &ctx.fugue, (void*)hash );
+#endif
 
      sph_gost512( &ctx.gost, hash, 64 );
      sph_gost512_close( &ctx.gost, hash );
diff --git a/algo/x13/skunk-4way.c b/algo/x13/skunk-4way.c
index e4698395..73d0205f 100644
--- a/algo/x13/skunk-4way.c
+++ b/algo/x13/skunk-4way.c
@@ -5,7 +5,7 @@
 #include <stdio.h>
 #include "algo/skein/skein-hash-4way.h"
 #include "algo/gost/sph_gost.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/cubehash/cubehash_sse2.h"
 #include "algo/cubehash/cube-hash-2way.h"
 
@@ -14,7 +14,7 @@
 typedef struct {
     skein512_8way_context skein;
     cube_4way_context     cube;
-    sph_fugue512_context  fugue;
+    hashState_fugue         fugue;
     sph_gost512_context   gost;
 } skunk_8way_ctx_holder;
 
@@ -46,29 +46,15 @@ void skunk_8way_hash( void *output, const void *input )
      cube_4way_init( &ctx.cube, 512, 16, 32 );           
      cube_4way_update_close( &ctx.cube, vhash, vhash, 64 );  
      dintrlv_4x128_512( hash4, hash5, hash6, hash7, vhash );
-     
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
-     sph_fugue512( &ctx.fugue, hash4, 64 );
-     sph_fugue512_close( &ctx.fugue, hash4 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash5, 64 );
-     sph_fugue512_close( &ctx.fugue, hash5 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash6, 64 );
-     sph_fugue512_close( &ctx.fugue, hash6 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash7, 64 );
-     sph_fugue512_close( &ctx.fugue, hash7 );
+
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      sph_gost512( &ctx.gost, hash0, 64 );
      sph_gost512_close( &ctx.gost, output );
@@ -140,7 +126,6 @@ bool skunk_8way_thread_init()
 {
    skein512_8way_init( &skunk_8way_ctx.skein );
    cube_4way_init( &skunk_8way_ctx.cube, 512, 16, 32 );
-   sph_fugue512_init( &skunk_8way_ctx.fugue );
    sph_gost512_init( &skunk_8way_ctx.gost );
    return true;
 }
@@ -150,7 +135,7 @@ bool skunk_8way_thread_init()
 typedef struct {
     skein512_4way_context skein;
     cubehashParam         cube;
-    sph_fugue512_context  fugue;
+    hashState_fugue       fugue;
     sph_gost512_context   gost;
 } skunk_4way_ctx_holder;
 
@@ -178,17 +163,10 @@ void skunk_4way_hash( void *output, const void *input )
      memcpy( &ctx.cube, &skunk_4way_ctx.cube, sizeof(cubehashParam) );
      cubehashUpdateDigest( &ctx.cube, (byte*)hash3, (const byte*) hash3, 64 );
 
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     sph_fugue512_init( &ctx.fugue );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      sph_gost512( &ctx.gost, hash0, 64 );
      sph_gost512_close( &ctx.gost, hash0 );
@@ -252,7 +230,6 @@ bool skunk_4way_thread_init()
 {
    skein512_4way_init( &skunk_4way_ctx.skein );
    cubehashInit( &skunk_4way_ctx.cube, 512, 16, 32 );
-   sph_fugue512_init( &skunk_4way_ctx.fugue );
    sph_gost512_init( &skunk_4way_ctx.gost );
    return true;
 }
diff --git a/algo/x13/skunk-gate.c b/algo/x13/skunk-gate.c
index 98bcf682..ab6c4ec8 100644
--- a/algo/x13/skunk-gate.c
+++ b/algo/x13/skunk-gate.c
@@ -2,7 +2,7 @@
 
 bool register_skunk_algo( algo_gate_t* gate )
 {
-   gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT;
+   gate->optimizations = SSE2_OPT | AVX2_OPT | AVX512_OPT | AES_OPT;
 #if defined (SKUNK_8WAY)
    gate->miner_thread_init = (void*)&skunk_8way_thread_init;
    gate->scanhash = (void*)&scanhash_skunk_8way;
diff --git a/algo/x13/skunk.c b/algo/x13/skunk.c
index 41728657..25549ff6 100644
--- a/algo/x13/skunk.c
+++ b/algo/x13/skunk.c
@@ -8,13 +8,21 @@
 #include <stdio.h>
 #include "algo/gost/sph_gost.h"
 #include "algo/skein/sph_skein.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/cubehash/cubehash_sse2.h"
+#if defined(__AES__)
+  #include "algo/fugue/fugue-aesni.h"
+#else
+  #include "algo/fugue/sph_fugue.h"
+#endif
 
 typedef struct {
     sph_skein512_context  skein;
     cubehashParam         cube;
+#if defined(__AES__)
+    hashState_fugue       fugue;
+#else
     sph_fugue512_context  fugue;
+#endif
     sph_gost512_context   gost;
 } skunk_ctx_holder;
 
@@ -32,8 +40,13 @@ void skunkhash( void *output, const void *input )
 
      cubehashUpdateDigest( &ctx.cube, (byte*) hash, (const byte*)hash, 64 );
 
+#if defined(__AES__)
+     fugue512_Update( &ctx.fugue, hash, 512 ); 
+     fugue512_Final( &ctx.fugue, hash ); 
+#else
      sph_fugue512( &ctx.fugue, hash, 64 );
      sph_fugue512_close( &ctx.fugue, hash );
+#endif
 
      sph_gost512( &ctx.gost, hash, 64 );
      sph_gost512_close( &ctx.gost, hash );
@@ -87,8 +100,12 @@ bool skunk_thread_init()
 {
    sph_skein512_init( &skunk_ctx.skein );
    cubehashInit( &skunk_ctx.cube, 512, 16, 32 );
-   sph_fugue512_init( &skunk_ctx.fugue );
-   sph_gost512_init( &skunk_ctx.gost );
+#if defined(__AES__)
+    fugue512_Init( &skunk_ctx.fugue, 512 );
+#else
+    sph_fugue512_init( &skunk_ctx.fugue );
+#endif
+    sph_gost512_init( &skunk_ctx.gost );
    return true;
 }
 #endif
diff --git a/algo/x13/x13-4way.c b/algo/x13/x13-4way.c
index 6518655a..f2b0d739 100644
--- a/algo/x13/x13-4way.c
+++ b/algo/x13/x13-4way.c
@@ -16,7 +16,7 @@
 #include "algo/simd/simd-hash-2way.h"
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #if defined(__VAES__)
   #include "algo/groestl/groestl512-hash-4way.h"
   #include "algo/shavite/shavite-hash-4way.h"
@@ -35,7 +35,7 @@ typedef struct {
     cube_4way_context       cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
 #if defined(__VAES__)
     groestl512_4way_context groestl;
     shavite512_4way_context shavite;
@@ -60,7 +60,7 @@ void init_x13_8way_ctx()
      cube_4way_init( &x13_8way_ctx.cube, 512, 16, 32 );
      simd_4way_init( &x13_8way_ctx.simd, 512 );
      hamsi512_8way_init( &x13_8way_ctx.hamsi );
-     sph_fugue512_init( &x13_8way_ctx.fugue );
+     fugue512_Init( &x13_8way_ctx.fugue, 512 );
 #if defined(__VAES__)
      groestl512_4way_init( &x13_8way_ctx.groestl, 64 );
      shavite512_4way_init( &x13_8way_ctx.shavite );
@@ -255,29 +255,29 @@ void x13_8way_hash( void *state, const void *input )
                        vhash );
 
      // 13 Fugue serial
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
-     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash4, 64 );
-     sph_fugue512_close( &ctx.fugue, hash4 );
-     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash5, 64 );
-     sph_fugue512_close( &ctx.fugue, hash5 );
-     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash6, 64 );
-     sph_fugue512_close( &ctx.fugue, hash6 );
-     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash7, 64 );
-     sph_fugue512_close( &ctx.fugue, hash7 );
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, hash0 ); 
+     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, hash1 );   
+     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, hash2 );   
+     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, hash3 );   
+     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash4, 512 );
+     fugue512_Final( &ctx.fugue, hash4 );   
+     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash5, 512 );
+     fugue512_Final( &ctx.fugue, hash5 );   
+     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash6, 512 );
+     fugue512_Final( &ctx.fugue, hash6 );   
+     memcpy( &ctx.fugue, &x13_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash7, 512 );
+     fugue512_Final( &ctx.fugue, hash7 );   
      
      memcpy( state,     hash0, 32 );
      memcpy( state+ 32, hash1, 32 );
@@ -344,7 +344,7 @@ typedef struct {
     simd_2way_context       simd;
     hashState_echo          echo;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
 } x13_4way_ctx_holder;
 
 x13_4way_ctx_holder x13_4way_ctx __attribute__ ((aligned (64)));
@@ -363,7 +363,7 @@ void init_x13_4way_ctx()
      simd_2way_init( &x13_4way_ctx.simd, 512 );
      init_echo( &x13_4way_ctx.echo, 512 );
      hamsi512_4way_init( &x13_4way_ctx.hamsi );
-     sph_fugue512_init( &x13_4way_ctx.fugue );
+     fugue512_Init( &x13_4way_ctx.fugue, 512 );
 };
 
 void x13_4way_hash( void *state, const void *input )
@@ -477,17 +477,17 @@ void x13_4way_hash( void *state, const void *input )
      dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
 
      // 13 Fugue serial
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, hash0 );      
+     memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, hash1 );       
+     memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, hash2 );      
+     memcpy( &ctx.fugue, &x13_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, hash3 );   
 
      memcpy( state,    hash0, 32 );
      memcpy( state+32, hash1, 32 );
diff --git a/algo/x13/x13.c b/algo/x13/x13.c
index 109729e0..ca66e007 100644
--- a/algo/x13/x13.c
+++ b/algo/x13/x13.c
@@ -13,7 +13,6 @@
 #include "algo/skein/sph_skein.h"
 #include "algo/shavite/sph_shavite.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/luffa/luffa_for_sse2.h"
 #include "algo/cubehash/cubehash_sse2.h"
 #include "algo/simd/nist.h"
@@ -21,9 +20,11 @@
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 typedef struct {
@@ -32,9 +33,11 @@ typedef struct {
 #if defined(__AES__)
    hashState_echo          echo;
    hashState_groestl       groestl;
+   hashState_fugue         fugue;
 #else
    sph_groestl512_context   groestl;
    sph_echo512_context      echo;
+   sph_fugue512_context    fugue;
 #endif
    sph_jh512_context       jh;
    sph_keccak512_context   keccak;
@@ -44,7 +47,6 @@ typedef struct {
    sph_shavite512_context  shavite;
    hashState_sd            simd;
    sph_hamsi512_context    hamsi;
-   sph_fugue512_context    fugue;
 } x13_ctx_holder;
 
 x13_ctx_holder x13_ctx;
@@ -56,9 +58,11 @@ void init_x13_ctx()
 #if defined(__AES__)
    init_groestl( &x13_ctx.groestl, 64 );
    init_echo( &x13_ctx.echo, 512 );
+   fugue512_Init( &x13_ctx.fugue, 512 );
 #else
    sph_groestl512_init( &x13_ctx.groestl );
    sph_echo512_init( &x13_ctx.echo );
+   sph_fugue512_init( &x13_ctx.fugue );
 #endif
    sph_skein512_init( &x13_ctx.skein );
    sph_jh512_init( &x13_ctx.jh );
@@ -68,7 +72,6 @@ void init_x13_ctx()
    sph_shavite512_init( &x13_ctx.shavite );
    init_sd( &x13_ctx.simd, 512 );
    sph_hamsi512_init( &x13_ctx.hamsi );
-   sph_fugue512_init( &x13_ctx.fugue );
 };
 
 void x13hash(void *output, const void *input)
@@ -84,11 +87,9 @@ void x13hash(void *output, const void *input)
     sph_bmw512_close( &ctx.bmw, hash );
 
 #if defined(__AES__)
-    init_groestl( &ctx.groestl, 64 );
     update_and_final_groestl( &ctx.groestl, (char*)hash,
                                       (const char*)hash, 512 );
 #else
-    sph_groestl512_init( &ctx.groestl );
     sph_groestl512( &ctx.groestl, hash, 64 );
     sph_groestl512_close( &ctx.groestl, hash );
 #endif
@@ -125,8 +126,13 @@ void x13hash(void *output, const void *input)
     sph_hamsi512( &ctx.hamsi, hash, 64 );
     sph_hamsi512_close( &ctx.hamsi, hash );
 
+#if defined(__AES__)
+    fugue512_Update( &ctx.fugue, hash, 512 );
+    fugue512_Final( &ctx.fugue, hash );  
+#else
     sph_fugue512( &ctx.fugue, hash, 64 );
-    sph_fugue512_close( &ctx.fugue, hash );
+    sph_fugue512_close( &ctx.fugue, hash ); 
+#endif
 
 	 memcpy( output, hash, 32 );
 }
diff --git a/algo/x13/x13bcd-4way.c b/algo/x13/x13bcd-4way.c
index 188b18bc..0f978e56 100644
--- a/algo/x13/x13bcd-4way.c
+++ b/algo/x13/x13bcd-4way.c
@@ -16,7 +16,7 @@
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/sm3/sm3-hash-4way.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #if defined(__VAES__)
   #include "algo/groestl/groestl512-hash-4way.h"
   #include "algo/shavite/shavite-hash-4way.h"
@@ -35,7 +35,7 @@ typedef struct {
     simd_4way_context       simd;
     sm3_8way_ctx_t          sm3;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
 #if defined(__VAES__)
     groestl512_4way_context groestl;
     shavite512_4way_context shavite;
@@ -61,7 +61,7 @@ void init_x13bcd_8way_ctx()
      simd_4way_init( &x13bcd_8way_ctx.simd, 512 );
      sm3_8way_init( &x13bcd_8way_ctx.sm3 );
      hamsi512_8way_init( &x13bcd_8way_ctx.hamsi );
-     sph_fugue512_init( &x13bcd_8way_ctx.fugue );
+     fugue512_Init( &x13bcd_8way_ctx.fugue, 512 );
 #if defined(__VAES__)
      groestl512_4way_init( &x13bcd_8way_ctx.groestl, 64 );
      shavite512_4way_init( &x13bcd_8way_ctx.shavite );
@@ -257,36 +257,30 @@ void x13bcd_8way_hash( void *state, const void *input )
                        hash4, hash5, hash6, hash7, vhash );
 
      // Fugue serial
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, state );
-     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, state+32 );
-     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, state+64 );
-     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, state+96 );
-     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash4, 64 );
-     sph_fugue512_close( &ctx.fugue, state+128 );
-     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash5, 64 );
-     sph_fugue512_close( &ctx.fugue, state+160 );
-     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash6, 64 );
-     sph_fugue512_close( &ctx.fugue, state+192 );
-     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash7, 64 );
-     sph_fugue512_close( &ctx.fugue, state+224 );
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, state );
+     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, state+32 );
+     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, state+64 );
+     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, state+96 );
+     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash4, 512 );
+     fugue512_Final( &ctx.fugue, state+128 );
+     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash5, 512 );
+     fugue512_Final( &ctx.fugue, state+160 );
+     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash6, 512 );
+     fugue512_Final( &ctx.fugue, state+192 );
+     memcpy( &ctx.fugue, &x13bcd_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash7, 512 );
+     fugue512_Final( &ctx.fugue, state+224 );
+
 }
 
 int scanhash_x13bcd_8way( struct work *work, uint32_t max_nonce,
@@ -346,7 +340,7 @@ typedef struct {
     hashState_echo          echo;
     sm3_4way_ctx_t          sm3;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
 } x13bcd_4way_ctx_holder;
 
 x13bcd_4way_ctx_holder x13bcd_4way_ctx __attribute__ ((aligned (64)));
@@ -366,7 +360,7 @@ void init_x13bcd_4way_ctx()
      init_echo( &x13bcd_4way_ctx.echo, 512 );
      sm3_4way_init( &x13bcd_4way_ctx.sm3 );
      hamsi512_4way_init( &x13bcd_4way_ctx.hamsi );
-     sph_fugue512_init( &x13bcd_4way_ctx.fugue );
+     fugue512_Init( &x13bcd_4way_ctx.fugue, 512 );
 };
 
 void x13bcd_4way_hash( void *state, const void *input )
@@ -489,20 +483,17 @@ void x13bcd_4way_hash( void *state, const void *input )
      dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
 
      // Fugue serial
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue,
-                         sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, hash0 );
+     memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, hash1 );
+     memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, hash2 );
+     memcpy( &ctx.fugue, &x13bcd_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, hash3 );
 
      memcpy( state,    hash0, 32 );
      memcpy( state+32, hash1, 32 );
diff --git a/algo/x13/x13bcd.c b/algo/x13/x13bcd.c
index d53e37b1..c397bb74 100644
--- a/algo/x13/x13bcd.c
+++ b/algo/x13/x13bcd.c
@@ -14,16 +14,17 @@
 #include "algo/skein/sph_skein.h"
 #include "algo/shavite/sph_shavite.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/cubehash/cubehash_sse2.h"
 #include "algo/simd/nist.h"
 
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 typedef struct {
@@ -32,9 +33,11 @@ typedef struct {
 #if defined(__AES__)
    hashState_echo          echo;
    hashState_groestl       groestl;
+   hashState_fugue         fugue;
 #else
    sph_groestl512_context   groestl;
    sph_echo512_context      echo;
+   sph_fugue512_context    fugue;
 #endif
    sph_jh512_context       jh;
    sph_keccak512_context   keccak;
@@ -43,7 +46,6 @@ typedef struct {
    sph_shavite512_context  shavite;
    hashState_sd            simd;
    sph_hamsi512_context    hamsi;
-   sph_fugue512_context    fugue;
    sm3_ctx_t               sm3;
 } x13bcd_ctx_holder;
 
@@ -56,9 +58,11 @@ void init_x13bcd_ctx()
 #if defined(__AES__)
    init_groestl( &x13bcd_ctx.groestl, 64 );
    init_echo( &x13bcd_ctx.echo, 512 );
+   fugue512_Init( &x13bcd_ctx.fugue, 512 );
 #else
    sph_groestl512_init( &x13bcd_ctx.groestl );
    sph_echo512_init( &x13bcd_ctx.echo );
+   sph_fugue512_init( &x13bcd_ctx.fugue );
 #endif
    sph_skein512_init( &x13bcd_ctx.skein );
    sph_jh512_init( &x13bcd_ctx.jh );
@@ -68,7 +72,6 @@ void init_x13bcd_ctx()
    init_sd( &x13bcd_ctx.simd,512 );
    sm3_init( &x13bcd_ctx.sm3 );
    sph_hamsi512_init( &x13bcd_ctx.hamsi );
-   sph_fugue512_init( &x13bcd_ctx.fugue );
 };
 
 void x13bcd_hash(void *output, const void *input)
@@ -129,8 +132,13 @@ void x13bcd_hash(void *output, const void *input)
     sph_hamsi512( &ctx.hamsi, hash, 64 );
     sph_hamsi512_close( &ctx.hamsi, hash );
 
+#if defined(__AES__)
+    fugue512_Update( &ctx.fugue, hash, 512 );
+    fugue512_Final( &ctx.fugue, hash );
+#else
     sph_fugue512( &ctx.fugue, hash, 64 );
     sph_fugue512_close( &ctx.fugue, hash );
+#endif
 
     memcpy( output, hash, 32 );
 }
diff --git a/algo/x14/x14-4way.c b/algo/x14/x14-4way.c
index 5ac8d8a5..e81b901a 100644
--- a/algo/x14/x14-4way.c
+++ b/algo/x14/x14-4way.c
@@ -17,7 +17,7 @@
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/echo/sph_echo.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/shabal/shabal-hash-4way.h"
 #if defined(__VAES__)
   #include "algo/groestl/groestl512-hash-4way.h"
@@ -37,7 +37,7 @@ typedef struct {
     cube_4way_context       cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
 #if defined(__VAES__)
     groestl512_4way_context groestl;
@@ -63,7 +63,7 @@ void init_x14_8way_ctx()
      cube_4way_init( &x14_8way_ctx.cube, 512, 16, 32 );
      simd_4way_init( &x14_8way_ctx.simd, 512 );
      hamsi512_8way_init( &x14_8way_ctx.hamsi );
-     sph_fugue512_init( &x14_8way_ctx.fugue );
+     fugue512_Init( &x14_8way_ctx.fugue, 512 );
      shabal512_8way_init( &x14_8way_ctx.shabal );
 #if defined(__VAES__)
      groestl512_4way_init( &x14_8way_ctx.groestl, 64 );
@@ -259,29 +259,29 @@ void x14_8way_hash( void *state, const void *input )
                        vhash );
 
      // 13 Fugue serial
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
-     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash4, 64 );
-     sph_fugue512_close( &ctx.fugue, hash4 );
-     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash5, 64 );
-     sph_fugue512_close( &ctx.fugue, hash5 );
-     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash6, 64 );
-     sph_fugue512_close( &ctx.fugue, hash6 );
-     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash7, 64 );
-     sph_fugue512_close( &ctx.fugue, hash7 );
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, hash0 );
+     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, hash1 );
+     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, hash2 );
+     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, hash3 );
+     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash4, 512 );
+     fugue512_Final( &ctx.fugue, hash4 );
+     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash5, 512 );
+     fugue512_Final( &ctx.fugue, hash5 );
+     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash6, 512 );
+     fugue512_Final( &ctx.fugue, hash6 );
+     memcpy( &ctx.fugue, &x14_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash7, 512 );
+     fugue512_Final( &ctx.fugue, hash7 );
 
      // 14 Shabal, parallel 32 bit
      intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -348,7 +348,7 @@ typedef struct {
     simd_2way_context       simd;
     hashState_echo          echo;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
 } x14_4way_ctx_holder;
 
@@ -368,7 +368,7 @@ void init_x14_4way_ctx()
      simd_2way_init( &x14_4way_ctx.simd, 512 );
      init_echo( &x14_4way_ctx.echo, 512 );
      hamsi512_4way_init( &x14_4way_ctx.hamsi );
-     sph_fugue512_init( &x14_4way_ctx.fugue );
+     fugue512_Init( &x14_4way_ctx.fugue, 512 );
      shabal512_4way_init( &x14_4way_ctx.shabal );
 };
 
@@ -483,17 +483,17 @@ void x14_4way_hash( void *state, const void *input )
      dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
 
      // 13 Fugue serial
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, hash0 );
+     memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, hash1 );
+     memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, hash2 );
+     memcpy( &ctx.fugue, &x14_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, hash3 );
 
      // 14 Shabal, parallel 32 bit
      intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
diff --git a/algo/x14/x14.c b/algo/x14/x14.c
index 83eab967..fdbcacb7 100644
--- a/algo/x14/x14.c
+++ b/algo/x14/x14.c
@@ -13,7 +13,6 @@
 #include "algo/skein/sph_skein.h"
 #include "algo/shavite/sph_shavite.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/shabal/sph_shabal.h"
 #include "algo/luffa/luffa_for_sse2.h"
 #include "algo/cubehash/cubehash_sse2.h"
@@ -21,9 +20,11 @@
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 typedef struct {
@@ -32,9 +33,11 @@ typedef struct {
 #if defined(__AES__)
    hashState_groestl       groestl;
    hashState_echo          echo;
+   hashState_fugue         fugue;
 #else
    sph_groestl512_context  groestl;
    sph_echo512_context     echo;
+   sph_fugue512_context    fugue;
 #endif
    sph_jh512_context       jh;
    sph_keccak512_context   keccak;
@@ -44,7 +47,6 @@ typedef struct {
    sph_shavite512_context  shavite;
    hashState_sd            simd;
    sph_hamsi512_context    hamsi;
-   sph_fugue512_context    fugue;
    sph_shabal512_context   shabal;
 } x14_ctx_holder;
 
@@ -57,9 +59,11 @@ void init_x14_ctx()
 #if defined(__AES__)
    init_groestl( &x14_ctx.groestl, 64 );
    init_echo( &x14_ctx.echo, 512 );
+   fugue512_Init( &x14_ctx.fugue, 512 );
 #else
    sph_groestl512_init( &x14_ctx.groestl );
    sph_echo512_init( &x14_ctx.echo );
+   sph_fugue512_init( &x14_ctx.fugue );
 #endif
    sph_skein512_init( &x14_ctx.skein );
    sph_jh512_init( &x14_ctx.jh );
@@ -69,7 +73,6 @@ void init_x14_ctx()
    sph_shavite512_init( &x14_ctx.shavite );
    init_sd( &x14_ctx.simd,512 );
    sph_hamsi512_init( &x14_ctx.hamsi );
-   sph_fugue512_init( &x14_ctx.fugue );
    sph_shabal512_init( &x14_ctx.shabal );
 };
 
@@ -125,8 +128,13 @@ void x14hash(void *output, const void *input)
     sph_hamsi512(&ctx.hamsi, hash, 64);
     sph_hamsi512_close(&ctx.hamsi, hash);
 
+#if defined(__AES__)
+    fugue512_Update( &ctx.fugue, hash, 512 );
+    fugue512_Final( &ctx.fugue, hash );
+#else
     sph_fugue512(&ctx.fugue, hash, 64);
     sph_fugue512_close(&ctx.fugue, hash);
+#endif
 
     sph_shabal512( &ctx.shabal, hash, 64 );
 	 sph_shabal512_close( &ctx.shabal, hash );
diff --git a/algo/x15/x15-4way.c b/algo/x15/x15-4way.c
index 7fff408a..281f87ba 100644
--- a/algo/x15/x15-4way.c
+++ b/algo/x15/x15-4way.c
@@ -17,7 +17,7 @@
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/echo/sph_echo.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/shabal/shabal-hash-4way.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #if defined(__VAES__)
@@ -38,7 +38,7 @@ typedef struct {
     cube_4way_context       cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
 #if defined(__VAES__)
@@ -65,7 +65,7 @@ void init_x15_8way_ctx()
      cube_4way_init( &x15_8way_ctx.cube, 512, 16, 32 );
      simd_4way_init( &x15_8way_ctx.simd, 512 );
      hamsi512_8way_init( &x15_8way_ctx.hamsi );
-     sph_fugue512_init( &x15_8way_ctx.fugue );
+     fugue512_Init( &x15_8way_ctx.fugue, 512 );
      shabal512_8way_init( &x15_8way_ctx.shabal );
      sph_whirlpool_init( &x15_8way_ctx.whirlpool );
 #if defined(__VAES__)
@@ -260,30 +260,29 @@ void x15_8way_hash( void *state, const void *input )
                        vhash );
 
      // 13 Fugue
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
-     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash4, 64 );
-     sph_fugue512_close( &ctx.fugue, hash4 );
-     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash5, 64 );
-     sph_fugue512_close( &ctx.fugue, hash5 );
-     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash6, 64 );
-     sph_fugue512_close( &ctx.fugue, hash6 );
-     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash7, 64 );
-     sph_fugue512_close( &ctx.fugue, hash7 );
-
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, hash0 );
+     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, hash1 );
+     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, hash2 );
+     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, hash3 );
+     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash4, 512 );
+     fugue512_Final( &ctx.fugue, hash4 );
+     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash5, 512 );
+     fugue512_Final( &ctx.fugue, hash5 );
+     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash6, 512 );
+     fugue512_Final( &ctx.fugue, hash6 );
+     memcpy( &ctx.fugue, &x15_8way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash7, 512 );
+     fugue512_Final( &ctx.fugue, hash7 );
 
      // 14 Shabal, parallel 32 bit
      intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
@@ -387,7 +386,7 @@ typedef struct {
     simd_2way_context       simd;
     hashState_echo          echo;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
 } x15_4way_ctx_holder;
@@ -408,7 +407,7 @@ void init_x15_4way_ctx()
      simd_2way_init( &x15_4way_ctx.simd, 512 );
      init_echo( &x15_4way_ctx.echo, 512 );
      hamsi512_4way_init( &x15_4way_ctx.hamsi );
-     sph_fugue512_init( &x15_4way_ctx.fugue );
+     fugue512_Init( &x15_4way_ctx.fugue, 512 );
      shabal512_4way_init( &x15_4way_ctx.shabal );
      sph_whirlpool_init( &x15_4way_ctx.whirlpool );
 };
@@ -524,17 +523,17 @@ void x15_4way_hash( void *state, const void *input )
      dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, 512 );
 
      // 13 Fugue
-     sph_fugue512( &ctx.fugue, hash0, 64 );
-     sph_fugue512_close( &ctx.fugue, hash0 );
-     memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash1, 64 );
-     sph_fugue512_close( &ctx.fugue, hash1 );
-     memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash2, 64 );
-     sph_fugue512_close( &ctx.fugue, hash2 );
-     memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(sph_fugue512_context) );
-     sph_fugue512( &ctx.fugue, hash3, 64 );
-     sph_fugue512_close( &ctx.fugue, hash3 );
+     fugue512_Update( &ctx.fugue, hash0, 512 );
+     fugue512_Final( &ctx.fugue, hash0 );
+     memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash1, 512 );
+     fugue512_Final( &ctx.fugue, hash1 );
+     memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash2, 512 );
+     fugue512_Final( &ctx.fugue, hash2 );
+     memcpy( &ctx.fugue, &x15_4way_ctx.fugue, sizeof(hashState_fugue) );
+     fugue512_Update( &ctx.fugue, hash3, 512 );
+     fugue512_Final( &ctx.fugue, hash3 );
 
      // 14 Shabal, parallel 32 bit
      intrlv_4x32( vhash, hash0, hash1, hash2, hash3, 512 );
diff --git a/algo/x15/x15.c b/algo/x15/x15.c
index c739e910..73d64db9 100644
--- a/algo/x15/x15.c
+++ b/algo/x15/x15.c
@@ -23,9 +23,11 @@
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 typedef struct {
@@ -34,9 +36,11 @@ typedef struct {
 #if defined(__AES__)
    hashState_echo          echo;
    hashState_groestl       groestl;
+   hashState_fugue         fugue;
 #else
    sph_groestl512_context   groestl;
    sph_echo512_context      echo;
+   sph_fugue512_context    fugue;
 #endif
    sph_jh512_context       jh;
    sph_keccak512_context   keccak;
@@ -46,7 +50,6 @@ typedef struct {
    sph_shavite512_context  shavite;
    hashState_sd            simd;
    sph_hamsi512_context    hamsi;
-   sph_fugue512_context    fugue;
    sph_shabal512_context   shabal;
    sph_whirlpool_context   whirlpool;
 } x15_ctx_holder;
@@ -60,9 +63,11 @@ void init_x15_ctx()
 #if defined(__AES__)
    init_groestl( &x15_ctx.groestl, 64 );
    init_echo( &x15_ctx.echo, 512 );
+   fugue512_Init( &x15_ctx.fugue, 512 );
 #else
    sph_groestl512_init( &x15_ctx.groestl );
    sph_echo512_init( &x15_ctx.echo );
+   sph_fugue512_init( &x15_ctx.fugue );
 #endif
    sph_skein512_init( &x15_ctx.skein );
    sph_jh512_init( &x15_ctx.jh );
@@ -72,7 +77,6 @@ void init_x15_ctx()
    sph_shavite512_init( &x15_ctx.shavite );
    init_sd( &x15_ctx.simd, 512 );
    sph_hamsi512_init( &x15_ctx.hamsi );
-   sph_fugue512_init( &x15_ctx.fugue );
    sph_shabal512_init( &x15_ctx.shabal );
    sph_whirlpool_init( &x15_ctx.whirlpool );
 };
@@ -131,8 +135,13 @@ void x15hash(void *output, const void *input)
     sph_hamsi512( &ctx.hamsi, hash, 64 );
     sph_hamsi512_close( &ctx.hamsi, hash );
 
+#if defined(__AES__)
+    fugue512_Update( &ctx.fugue, hash, 512 );
+    fugue512_Final( &ctx.fugue, hash );
+#else
     sph_fugue512( &ctx.fugue, hash, 64 );
     sph_fugue512_close( &ctx.fugue, hash );
+#endif
 
     sph_shabal512( &ctx.shabal, hash, 64 );
     sph_shabal512_close( &ctx.shabal, hash );
diff --git a/algo/x16/hex.c b/algo/x16/hex.c
index bb08526c..ada1ca71 100644
--- a/algo/x16/hex.c
+++ b/algo/x16/hex.c
@@ -6,30 +6,6 @@
  */
 #include "x16r-gate.h"
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "algo/blake/sph_blake.h"
-#include "algo/bmw/sph_bmw.h"
-#include "algo/groestl/sph_groestl.h"
-#include "algo/jh/sph_jh.h"
-#include "algo/keccak/sph_keccak.h"
-#include "algo/skein/sph_skein.h"
-#include "algo/shavite/sph_shavite.h"
-#include "algo/luffa/luffa_for_sse2.h"
-#include "algo/cubehash/cubehash_sse2.h"
-#include "algo/simd/nist.h"
-#include "algo/echo/sph_echo.h"
-#include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
-#include "algo/shabal/sph_shabal.h"
-#include "algo/whirlpool/sph_whirlpool.h"
-#include <openssl/sha.h>
-#if defined(__AES__)
-  #include "algo/echo/aes_ni/hash_api.h"
-  #include "algo/groestl/aes_ni/hash-groestl.h"
-#endif
-
 static void hex_getAlgoString(const uint32_t* prevblock, char *output)
 {
    char *sptr = output;
@@ -47,34 +23,6 @@ static void hex_getAlgoString(const uint32_t* prevblock, char *output)
    *sptr = '\0';
 }
 
-/*
-union _hex_context_overlay
-{
-#if defined(__AES__)
-        hashState_echo          echo;
-        hashState_groestl       groestl;
-#else
-        sph_groestl512_context   groestl;
-        sph_echo512_context      echo;
-#endif
-        sph_blake512_context    blake;
-        sph_bmw512_context      bmw;
-        sph_skein512_context    skein;
-        sph_jh512_context       jh;
-        sph_keccak512_context   keccak;
-        hashState_luffa         luffa;
-        cubehashParam           cube;
-        shavite512_context      shavite;
-        hashState_sd            simd;
-        sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
-        sph_shabal512_context   shabal;
-        sph_whirlpool_context   whirlpool;
-        SHA512_CTX              sha512;
-};
-typedef union _hex_context_overlay hex_context_overlay;
-*/
-
 static __thread x16r_context_overlay hex_ctx;
 
 int hex_hash( void* output, const void* input, int thrid )
@@ -187,8 +135,12 @@ int hex_hash( void* output, const void* input, int thrid )
             sph_hamsi512_close( &ctx.hamsi, hash );
          break;
          case FUGUE:
+#if defined(__AES__)
+             fugue512_full( &ctx.fugue, hash, in, size );
+#else
              sph_fugue512_full( &ctx.fugue, hash, in, size );
-         break;
+#endif
+	     break;
          case SHABAL:
             if ( i == 0 ) 
                sph_shabal512( &ctx.shabal, in+64, 16 );
diff --git a/algo/x16/minotaur.c b/algo/x16/minotaur.c
index 069bf971..99575640 100644
--- a/algo/x16/minotaur.c
+++ b/algo/x16/minotaur.c
@@ -15,16 +15,17 @@
 #include "algo/cubehash/cubehash_sse2.h"
 #include "algo/simd/nist.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/shabal/sph_shabal.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include <openssl/sha.h>
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/echo/sph_echo.h"
   #include "algo/groestl/sph_groestl.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 // Config
@@ -34,13 +35,16 @@ typedef struct TortureNode TortureNode;
 typedef struct TortureGarden TortureGarden;
 
 // Graph of hash algos plus SPH contexts
-struct TortureGarden {
+struct TortureGarden
+{
 #if defined(__AES__)
         hashState_echo          echo;
         hashState_groestl       groestl;
+        hashState_fugue         fugue;
 #else
-        sph_echo512_context      echo;
-        sph_groestl512_context   groestl;
+        sph_echo512_context     echo;
+        sph_groestl512_context  groestl;
+        sph_fugue512_context    fugue;
 #endif
         sph_blake512_context    blake;
         sph_bmw512_context      bmw;
@@ -52,15 +56,13 @@ struct TortureGarden {
         shavite512_context      shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
 
     struct TortureNode {
         unsigned int algo;
-        TortureNode *childLeft;
-        TortureNode *childRight;
+        TortureNode *child[2];
     } nodes[22];
 } __attribute__ ((aligned (64)));
 
@@ -97,10 +99,12 @@ static void get_hash( void *output, const void *input, TortureGarden *garden,
 #endif
 	    break;
         case 4:
-            sph_fugue512_init(&garden->fugue);
-            sph_fugue512(&garden->fugue, input, 64);
-            sph_fugue512_close(&garden->fugue, hash);          
-            break;
+#if defined(__AES__)
+            fugue512_full( &garden->fugue, hash, input, 64 );
+#else
+            sph_fugue512_full( &garden->fugue, hash, input, 64 );
+#endif
+	    break;
         case 5:
 #if defined(__AES__)
             groestl512_full( &garden->groestl, (char*)hash, (char*)input, 512 );
@@ -162,68 +166,61 @@ static void get_hash( void *output, const void *input, TortureGarden *garden,
             break;
     }
 
-    // Output the hash
     memcpy(output, hash, 64);
 }
 
-// Recursively traverse a given torture garden starting with a given hash and given node within the garden. The hash is overwritten with the final hash.
-static void traverse_garden( TortureGarden *garden, void *hash,
-	                     TortureNode *node )
-{
-    unsigned char partialHash[64] __attribute__ ((aligned (64)));
-    get_hash(partialHash, hash, garden, node->algo);
-
-    if ( partialHash[63] % 2 == 0 )
-    {   // Last byte of output hash is even
-        if ( node->childLeft != NULL )
-            traverse_garden( garden, partialHash, node->childLeft );
-    }
-    else
-    {   // Last byte of output hash is odd
-        if ( node->childRight != NULL )
-            traverse_garden( garden, partialHash, node->childRight );
-    }
-
-    memcpy( hash, partialHash, 64 );
-}
-
-// Associate child nodes with a parent node
-static inline void link_nodes( TortureNode *parent, TortureNode *childLeft,
-	                       TortureNode *childRight ) 
-{
-    parent->childLeft = childLeft;
-    parent->childRight = childRight;
-}
-
 static __thread TortureGarden garden;
 
 bool initialize_torture_garden()
 {
     // Create torture garden nodes. Note that both sides of 19 and 20 lead to 21, and 21 has no children (to make traversal complete).
-    link_nodes(&garden.nodes[0], &garden.nodes[1], &garden.nodes[2]);
-    link_nodes(&garden.nodes[1], &garden.nodes[3], &garden.nodes[4]);
-    link_nodes(&garden.nodes[2], &garden.nodes[5], &garden.nodes[6]);
-    link_nodes(&garden.nodes[3], &garden.nodes[7], &garden.nodes[8]);
-    link_nodes(&garden.nodes[4], &garden.nodes[9], &garden.nodes[10]);
-    link_nodes(&garden.nodes[5], &garden.nodes[11], &garden.nodes[12]);
-    link_nodes(&garden.nodes[6], &garden.nodes[13], &garden.nodes[14]);
-    link_nodes(&garden.nodes[7], &garden.nodes[15], &garden.nodes[16]);
-    link_nodes(&garden.nodes[8], &garden.nodes[15], &garden.nodes[16]);
-    link_nodes(&garden.nodes[9], &garden.nodes[15], &garden.nodes[16]);
-    link_nodes(&garden.nodes[10], &garden.nodes[15], &garden.nodes[16]);
-    link_nodes(&garden.nodes[11], &garden.nodes[17], &garden.nodes[18]);
-    link_nodes(&garden.nodes[12], &garden.nodes[17], &garden.nodes[18]);
-    link_nodes(&garden.nodes[13], &garden.nodes[17], &garden.nodes[18]);
-    link_nodes(&garden.nodes[14], &garden.nodes[17], &garden.nodes[18]);
-    link_nodes(&garden.nodes[15], &garden.nodes[19], &garden.nodes[20]);
-    link_nodes(&garden.nodes[16], &garden.nodes[19], &garden.nodes[20]);
-    link_nodes(&garden.nodes[17], &garden.nodes[19], &garden.nodes[20]);
-    link_nodes(&garden.nodes[18], &garden.nodes[19], &garden.nodes[20]);
-    link_nodes(&garden.nodes[19], &garden.nodes[21], &garden.nodes[21]);
-    link_nodes(&garden.nodes[20], &garden.nodes[21], &garden.nodes[21]);
-    garden.nodes[21].childLeft = NULL;
-    garden.nodes[21].childRight = NULL;
-    return true;
+
+   garden.nodes[ 0].child[0] = &garden.nodes[ 1];
+   garden.nodes[ 0].child[1] = &garden.nodes[ 2];
+   garden.nodes[ 1].child[0] = &garden.nodes[ 3];
+   garden.nodes[ 1].child[1] = &garden.nodes[ 4];
+   garden.nodes[ 2].child[0] = &garden.nodes[ 5];
+   garden.nodes[ 2].child[1] = &garden.nodes[ 6];
+   garden.nodes[ 3].child[0] = &garden.nodes[ 7];
+   garden.nodes[ 3].child[1] = &garden.nodes[ 8];
+   garden.nodes[ 4].child[0] = &garden.nodes[ 9];
+   garden.nodes[ 4].child[1] = &garden.nodes[10];
+   garden.nodes[ 5].child[0] = &garden.nodes[11];
+   garden.nodes[ 5].child[1] = &garden.nodes[12];
+   garden.nodes[ 6].child[0] = &garden.nodes[13];
+   garden.nodes[ 6].child[1] = &garden.nodes[14];
+   garden.nodes[ 7].child[0] = &garden.nodes[15];
+   garden.nodes[ 7].child[1] = &garden.nodes[16];
+   garden.nodes[ 8].child[0] = &garden.nodes[15];
+   garden.nodes[ 8].child[1] = &garden.nodes[16];
+   garden.nodes[ 9].child[0] = &garden.nodes[15];
+   garden.nodes[ 9].child[1] = &garden.nodes[16];
+   garden.nodes[10].child[0] = &garden.nodes[15];
+   garden.nodes[10].child[1] = &garden.nodes[16];
+   garden.nodes[11].child[0] = &garden.nodes[17];
+   garden.nodes[11].child[1] = &garden.nodes[18];
+   garden.nodes[12].child[0] = &garden.nodes[17];
+   garden.nodes[12].child[1] = &garden.nodes[18];
+   garden.nodes[13].child[0] = &garden.nodes[17];
+   garden.nodes[13].child[1] = &garden.nodes[18];
+   garden.nodes[14].child[0] = &garden.nodes[17];
+   garden.nodes[14].child[1] = &garden.nodes[18];
+   garden.nodes[15].child[0] = &garden.nodes[19];
+   garden.nodes[15].child[1] = &garden.nodes[20];
+   garden.nodes[16].child[0] = &garden.nodes[19];
+   garden.nodes[16].child[1] = &garden.nodes[20];
+   garden.nodes[17].child[0] = &garden.nodes[19];
+   garden.nodes[17].child[1] = &garden.nodes[20];
+   garden.nodes[18].child[0] = &garden.nodes[19];
+   garden.nodes[18].child[1] = &garden.nodes[20];
+   garden.nodes[19].child[0] = &garden.nodes[21];
+   garden.nodes[19].child[1] = &garden.nodes[21];
+   garden.nodes[20].child[0] = &garden.nodes[21];
+   garden.nodes[20].child[1] = &garden.nodes[21];
+   garden.nodes[21].child[0] = NULL;
+   garden.nodes[21].child[1] = NULL;
+
+   return true;
 }
 
 // Produce a 32-byte hash from 80-byte input data
@@ -236,20 +233,67 @@ int minotaur_hash( void *output, const void *input, int thr_id )
     SHA512_Update( &garden.sha512, input, 80 );
     SHA512_Final( (unsigned char*) hash, &garden.sha512 );
 
+    // algo 6 (Hamsi) is very slow. It's faster to skip hashing this nonce
+    // if Hamsi is needed but only the first and last functions are
+    // currently known. Abort if either is Hamsi.
+    if ( ( ( hash[ 0] % MINOTAUR_ALGO_COUNT ) == 6 )
+      || ( ( hash[21] % MINOTAUR_ALGO_COUNT ) == 6 ) )
+         return 0;
+
     // Assign algos to torture garden nodes based on initial hash
     for ( int i = 0; i < 22; i++ )
         garden.nodes[i].algo = hash[i] % MINOTAUR_ALGO_COUNT;
 
     // Send the initial hash through the torture garden
-    traverse_garden( &garden, hash, &garden.nodes[0] );
+    TortureNode *node = &garden.nodes[0];
 
-    memcpy( output, hash, 32 );
+    while ( node )
+    {
+      get_hash( hash, hash, &garden, node->algo );
+      node = node->child[ hash[63] & 1 ];
+    }
 
+    memcpy( output, hash, 32 );
     return 1;
 }
 
+int scanhash_minotaur( struct work *work, uint32_t max_nonce,
+                      uint64_t *hashes_done, struct thr_info *mythr )
+{
+   uint32_t edata[20] __attribute__((aligned(64)));
+   uint32_t hash[8] __attribute__((aligned(64)));
+   uint32_t *pdata = work->data;
+   uint32_t *ptarget = work->target;
+   const uint32_t first_nonce = pdata[19];
+   const uint32_t last_nonce = max_nonce - 1;
+   uint32_t n = first_nonce;
+   const int thr_id = mythr->id;
+   const bool bench = opt_benchmark;
+   uint64_t skipped = 0;
+
+   mm128_bswap32_80( edata, pdata );
+   do
+   {
+      edata[19] = n;
+      if ( likely( algo_gate.hash( hash, edata, thr_id ) ) )
+      {
+	 if ( unlikely( valid_hash( hash, ptarget ) && !bench ) )
+         {
+            pdata[19] = bswap_32( n );
+            submit_solution( work, hash, mythr );
+         }
+      }
+      else skipped++;
+      n++;
+   } while ( n < last_nonce && !work_restart[thr_id].restart );
+   *hashes_done = n - first_nonce - skipped;
+   pdata[19] = n;
+   return 0;
+}
+
 bool register_minotaur_algo( algo_gate_t* gate )
 {
+  gate->scanhash = (void*)&scanhash_minotaur;
   gate->hash      = (void*)&minotaur_hash;
   gate->optimizations = SSE2_OPT | AES_OPT | AVX2_OPT | AVX512_OPT;
   gate->miner_thread_init = (void*)&initialize_torture_garden;
diff --git a/algo/x16/x16r-4way.c b/algo/x16/x16r-4way.c
index 79559619..abbe16a3 100644
--- a/algo/x16/x16r-4way.c
+++ b/algo/x16/x16r-4way.c
@@ -347,14 +347,14 @@ int x16r_8way_hash_generic( void* output, const void* input, int thrid )
                           hash7, vhash );
          break;
          case FUGUE:
-             sph_fugue512_full( &ctx.fugue, hash0, in0, size );
-             sph_fugue512_full( &ctx.fugue, hash1, in1, size );
-             sph_fugue512_full( &ctx.fugue, hash2, in2, size );
-             sph_fugue512_full( &ctx.fugue, hash3, in3, size );
-             sph_fugue512_full( &ctx.fugue, hash4, in4, size );
-             sph_fugue512_full( &ctx.fugue, hash5, in5, size );
-             sph_fugue512_full( &ctx.fugue, hash6, in6, size );
-             sph_fugue512_full( &ctx.fugue, hash7, in7, size );
+             fugue512_full( &ctx.fugue, hash0, in0, size );
+             fugue512_full( &ctx.fugue, hash1, in1, size );
+             fugue512_full( &ctx.fugue, hash2, in2, size );
+             fugue512_full( &ctx.fugue, hash3, in3, size );
+             fugue512_full( &ctx.fugue, hash4, in4, size );
+             fugue512_full( &ctx.fugue, hash5, in5, size );
+             fugue512_full( &ctx.fugue, hash6, in6, size );
+             fugue512_full( &ctx.fugue, hash7, in7, size );
          break;
          case SHABAL:
              intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -747,10 +747,10 @@ int x16r_4way_hash_generic( void* output, const void* input, int thrid )
             dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
          break;
          case FUGUE:
-             sph_fugue512_full( &ctx.fugue, hash0, in0, size );
-             sph_fugue512_full( &ctx.fugue, hash1, in1, size );
-             sph_fugue512_full( &ctx.fugue, hash2, in2, size );
-             sph_fugue512_full( &ctx.fugue, hash3, in3, size );
+             fugue512_full( &ctx.fugue, hash0, in0, size );
+             fugue512_full( &ctx.fugue, hash1, in1, size );
+             fugue512_full( &ctx.fugue, hash2, in2, size );
+             fugue512_full( &ctx.fugue, hash3, in3, size );
          break;
          case SHABAL:
              intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
diff --git a/algo/x16/x16r-gate.h b/algo/x16/x16r-gate.h
index 0c373b33..cbd3899c 100644
--- a/algo/x16/x16r-gate.h
+++ b/algo/x16/x16r-gate.h
@@ -24,6 +24,7 @@
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #endif
 #if defined (__AVX2__)
 #include "algo/blake/blake-hash-4way.h"
@@ -111,7 +112,7 @@ union _x16r_8way_context_overlay
     cubehashParam           cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_8way_context     sha512;
@@ -155,7 +156,7 @@ union _x16r_4way_context_overlay
     shavite512_context      shavite;
     simd_2way_context       simd;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_4way_context     sha512;
@@ -180,9 +181,11 @@ union _x16r_context_overlay
 #if defined(__AES__)
         hashState_echo          echo;
         hashState_groestl       groestl;
+        hashState_fugue         fugue;
 #else
         sph_groestl512_context   groestl;
         sph_echo512_context      echo;
+        sph_fugue512_context    fugue;
 #endif
         sph_blake512_context    blake;
         sph_bmw512_context      bmw;
@@ -194,7 +197,6 @@ union _x16r_context_overlay
         shavite512_context      shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
diff --git a/algo/x16/x16r.c b/algo/x16/x16r.c
index e64feb0e..b8cab348 100644
--- a/algo/x16/x16r.c
+++ b/algo/x16/x16r.c
@@ -151,8 +151,12 @@ int x16r_hash_generic( void* output, const void* input, int thrid )
             sph_hamsi512_close( &ctx.hamsi, hash );
          break;
          case FUGUE:
-            sph_fugue512_full( &ctx.fugue, hash, in, size );
-         break;
+#if defined(__AES__)
+         fugue512_full( &ctx.fugue, hash, in, size );
+#else
+	 sph_fugue512_full( &ctx.fugue, hash, in, size );
+#endif
+	 break;
          case SHABAL:
             if ( i == 0 )
                sph_shabal512( &ctx.shabal, in+64, 16 );
diff --git a/algo/x16/x16rv2-4way.c b/algo/x16/x16rv2-4way.c
index 1db130c4..e2d80dab 100644
--- a/algo/x16/x16rv2-4way.c
+++ b/algo/x16/x16rv2-4way.c
@@ -8,30 +8,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include "algo/blake/blake-hash-4way.h"
-#include "algo/bmw/bmw-hash-4way.h"
-#include "algo/groestl/aes_ni/hash-groestl.h"
-#include "algo/groestl/aes_ni/hash-groestl.h"
-#include "algo/skein/skein-hash-4way.h"
-#include "algo/jh/jh-hash-4way.h"
-#include "algo/keccak/keccak-hash-4way.h"
-#include "algo/shavite/sph_shavite.h"
-#include "algo/luffa/luffa-hash-2way.h"
-#include "algo/cubehash/cubehash_sse2.h"
-#include "algo/cubehash/cube-hash-2way.h"
-#include "algo/simd/simd-hash-2way.h"
-#include "algo/echo/aes_ni/hash_api.h"
-#include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
-#include "algo/shabal/shabal-hash-4way.h"
-#include "algo/whirlpool/sph_whirlpool.h"
-#include "algo/sha/sha-hash-4way.h"
 #include "algo/tiger/sph_tiger.h"
-#if defined(__VAES__)
-  #include "algo/groestl/groestl512-hash-4way.h"
-  #include "algo/shavite/shavite-hash-4way.h"
-  #include "algo/echo/echo-hash-4way.h"
-#endif
 
 #if defined (X16RV2_8WAY)
 
@@ -46,7 +23,7 @@ union _x16rv2_8way_context_overlay
     cubehashParam           cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_8way_context     sha512;
@@ -432,14 +409,14 @@ int x16rv2_8way_hash( void* output, const void* input, int thrid )
                           hash7, vhash );
          break;
          case FUGUE:
-            sph_fugue512_full( &ctx.fugue, hash0, in0, size );
-            sph_fugue512_full( &ctx.fugue, hash1, in1, size );
-            sph_fugue512_full( &ctx.fugue, hash2, in2, size );
-            sph_fugue512_full( &ctx.fugue, hash3, in3, size );
-            sph_fugue512_full( &ctx.fugue, hash4, in4, size );
-            sph_fugue512_full( &ctx.fugue, hash5, in5, size );
-            sph_fugue512_full( &ctx.fugue, hash6, in6, size );
-            sph_fugue512_full( &ctx.fugue, hash7, in7, size );
+            fugue512_full( &ctx.fugue, hash0, in0, size );
+            fugue512_full( &ctx.fugue, hash1, in1, size );
+            fugue512_full( &ctx.fugue, hash2, in2, size );
+            fugue512_full( &ctx.fugue, hash3, in3, size );
+            fugue512_full( &ctx.fugue, hash4, in4, size );
+            fugue512_full( &ctx.fugue, hash5, in5, size );
+            fugue512_full( &ctx.fugue, hash6, in6, size );
+            fugue512_full( &ctx.fugue, hash7, in7, size );
          break;
          case SHABAL:
             intrlv_8x32( vhash, in0, in1, in2, in3, in4, in5, in6, in7,
@@ -705,7 +682,7 @@ union _x16rv2_4way_context_overlay
     shavite512_context      shavite;
     simd_2way_context       simd;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_4way_context     sha512;
@@ -946,10 +923,10 @@ int x16rv2_4way_hash( void* output, const void* input, int thrid )
             dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
          break;
          case FUGUE:
-            sph_fugue512_full( &ctx.fugue, hash0, in0, size );
-            sph_fugue512_full( &ctx.fugue, hash1, in1, size );
-            sph_fugue512_full( &ctx.fugue, hash2, in2, size );
-            sph_fugue512_full( &ctx.fugue, hash3, in3, size );
+            fugue512_full( &ctx.fugue, hash0, in0, size );
+            fugue512_full( &ctx.fugue, hash1, in1, size );
+            fugue512_full( &ctx.fugue, hash2, in2, size );
+            fugue512_full( &ctx.fugue, hash3, in3, size );
          break;
          case SHABAL:
              intrlv_4x32( vhash, in0, in1, in2, in3, size<<3 );
diff --git a/algo/x16/x16rv2.c b/algo/x16/x16rv2.c
index c0a5eac9..62109329 100644
--- a/algo/x16/x16rv2.c
+++ b/algo/x16/x16rv2.c
@@ -8,41 +8,18 @@
 
 #if !defined(X16R_8WAY) && !defined(X16R_4WAY)
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "algo/blake/sph_blake.h"
-#include "algo/bmw/sph_bmw.h"
-#include "algo/groestl/sph_groestl.h"
-#include "algo/jh/sph_jh.h"
-#include "algo/keccak/sph_keccak.h"
-#include "algo/skein/sph_skein.h"
-#include "algo/shavite/sph_shavite.h"
-#include "algo/luffa/luffa_for_sse2.h"
-#include "algo/cubehash/cubehash_sse2.h"
-#include "algo/simd/nist.h"
-#include "algo/echo/sph_echo.h"
-#include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
-#include "algo/shabal/sph_shabal.h"
-#include "algo/whirlpool/sph_whirlpool.h"
-#include <openssl/sha.h>
 #include "algo/tiger/sph_tiger.h"
-#if defined(__AES__)
-  #include "algo/echo/aes_ni/hash_api.h"
-  #include "algo/groestl/aes_ni/hash-groestl.h"
-#endif
-
-static __thread uint32_t s_ntime = UINT32_MAX;
 
 union _x16rv2_context_overlay
 {
 #if defined(__AES__)
         hashState_echo          echo;
         hashState_groestl       groestl;
+        hashState_fugue         fugue;
 #else
         sph_groestl512_context   groestl;
         sph_echo512_context      echo;
+        sph_fugue512_context    fugue;
 #endif
         sph_blake512_context    blake;
         sph_bmw512_context      bmw;
@@ -54,7 +31,6 @@ union _x16rv2_context_overlay
         shavite512_context      shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
@@ -160,8 +136,12 @@ int x16rv2_hash( void* output, const void* input, int thrid )
              sph_hamsi512_close( &ctx.hamsi, hash );
          break;
          case FUGUE:
+#if defined(__AES__)
+             fugue512_full( &ctx.fugue, hash, in, size );
+#else
              sph_fugue512_full( &ctx.fugue, hash, in, size );
-         break;
+#endif
+	     break;
          case SHABAL:
              sph_shabal512_init( &ctx.shabal );
              sph_shabal512( &ctx.shabal, in, size );
diff --git a/algo/x17/sonoa-4way.c b/algo/x17/sonoa-4way.c
index a9e6b631..e4fe98b0 100644
--- a/algo/x17/sonoa-4way.c
+++ b/algo/x17/sonoa-4way.c
@@ -16,7 +16,7 @@
 #include "algo/simd/simd-hash-2way.h"
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/shabal/shabal-hash-4way.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/haval/haval-hash-4way.h"
@@ -40,7 +40,7 @@ union _sonoa_8way_context_overlay
     cube_4way_context       cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_8way_context     sha512;
@@ -423,14 +423,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
      dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                        vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      if ( work_restart[thr_id].restart ) return 0;
 // 4
@@ -554,14 +554,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
      dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                        vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
                       hash7 );
@@ -755,14 +755,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
      dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                        vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
                       hash7 );
@@ -905,14 +905,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
      dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                        vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
                       hash7 );
@@ -1074,14 +1074,14 @@ int sonoa_8way_hash( void *state, const void *input, int thr_id )
      dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                        vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
                       hash7 );
@@ -1134,7 +1134,7 @@ union _sonoa_4way_context_overlay
     simd_2way_context       simd;
     hashState_echo          echo;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_4way_context     sha512;
@@ -1327,10 +1327,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
 
      dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      if ( work_restart[thr_id].restart ) return 0;
 // 4
@@ -1393,10 +1393,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
 
      dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
 
@@ -1496,10 +1496,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
 
      dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
 
@@ -1576,10 +1576,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
 
      dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
 
@@ -1669,10 +1669,10 @@ int sonoa_4way_hash( void *state, const void *input, int thr_id )
 
      dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
 
diff --git a/algo/x17/sonoa.c b/algo/x17/sonoa.c
index 19dbcb7c..4253ad89 100644
--- a/algo/x17/sonoa.c
+++ b/algo/x17/sonoa.c
@@ -14,7 +14,6 @@
 #include "algo/skein/sph_skein.h"
 #include "algo/shavite/sph_shavite.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/shabal/sph_shabal.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/haval/sph-haval.h"
@@ -25,9 +24,11 @@
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 typedef struct {
@@ -36,9 +37,11 @@ typedef struct {
 #if defined(__AES__)
         hashState_echo          echo;
         hashState_groestl       groestl;
+        hashState_fugue         fugue;
 #else
         sph_groestl512_context  groestl;
         sph_echo512_context     echo;
+        sph_fugue512_context    fugue;
 #endif
         sph_jh512_context       jh;
         sph_keccak512_context   keccak;
@@ -48,7 +51,6 @@ typedef struct {
         sph_shavite512_context  shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
@@ -64,9 +66,11 @@ void init_sonoa_ctx()
 #if defined(__AES__)
         init_echo( &sonoa_ctx.echo, 512 );
         init_groestl( &sonoa_ctx.groestl, 64 );
+        fugue512_Init( &sonoa_ctx.fugue, 512 );
 #else
         sph_groestl512_init(&sonoa_ctx.groestl );
         sph_echo512_init( &sonoa_ctx.echo );
+        sph_fugue512_init( &sonoa_ctx.fugue );
 #endif
         sph_skein512_init( &sonoa_ctx.skein);
         sph_jh512_init( &sonoa_ctx.jh);
@@ -76,7 +80,6 @@ void init_sonoa_ctx()
         sph_shavite512_init( &sonoa_ctx.shavite );
         init_sd( &sonoa_ctx.simd, 512 );
         sph_hamsi512_init( &sonoa_ctx.hamsi );
-        sph_fugue512_init( &sonoa_ctx.fugue );
         sph_shabal512_init( &sonoa_ctx.shabal );
         sph_whirlpool_init( &sonoa_ctx.whirlpool );
         SHA512_Init( &sonoa_ctx.sha512 );
@@ -249,8 +252,13 @@ int sonoa_hash( void *state, const void *input, int thr_id )
    sph_hamsi512(&ctx.hamsi, hash, 64);
    sph_hamsi512_close(&ctx.hamsi, hash);
 
+#if defined(__AES__)
+   fugue512_Update( &ctx.fugue, hash, 512 );
+   fugue512_Final( &ctx.fugue, hash ); 
+#else   
    sph_fugue512(&ctx.fugue, hash, 64);
    sph_fugue512_close(&ctx.fugue, hash);
+#endif
 
    if ( work_restart[thr_id].restart ) return 0;
 //
@@ -311,9 +319,11 @@ int sonoa_hash( void *state, const void *input, int thr_id )
    sph_hamsi512(&ctx.hamsi, hash, 64);
    sph_hamsi512_close(&ctx.hamsi, hash);
 
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512(&ctx.fugue, hash, 64);
-   sph_fugue512_close(&ctx.fugue, hash);
+#if defined(__AES__)
+    fugue512_full( &ctx.fugue, hash, hash, 64 );
+#else
+    sph_fugue512_full( &ctx.fugue, hash, hash, 64 );
+#endif
 
    sph_shabal512(&ctx.shabal, hash, 64);
    sph_shabal512_close(&ctx.shabal, hash);
@@ -399,9 +409,11 @@ int sonoa_hash( void *state, const void *input, int thr_id )
    sph_hamsi512(&ctx.hamsi, hash, 64);
    sph_hamsi512_close(&ctx.hamsi, hash);
 
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512(&ctx.fugue, hash, 64);
-   sph_fugue512_close(&ctx.fugue, hash);
+#if defined(__AES__)
+    fugue512_full( &ctx.fugue, hash, hash, 64 );
+#else
+    sph_fugue512_full( &ctx.fugue, hash, hash, 64 );
+#endif
 
    sph_shabal512_init( &ctx.shabal );
    sph_shabal512(&ctx.shabal, hash, 64);
@@ -468,9 +480,11 @@ int sonoa_hash( void *state, const void *input, int thr_id )
    sph_hamsi512(&ctx.hamsi, hash, 64);
    sph_hamsi512_close(&ctx.hamsi, hash);
 
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512(&ctx.fugue, hash, 64);
-   sph_fugue512_close(&ctx.fugue, hash);
+#if defined(__AES__)
+    fugue512_full( &ctx.fugue, hash, hash, 64 );
+#else
+    sph_fugue512_full( &ctx.fugue, hash, hash, 64 );
+#endif
 
    sph_shabal512_init( &ctx.shabal );
    sph_shabal512(&ctx.shabal, hash, 64);
@@ -546,9 +560,11 @@ int sonoa_hash( void *state, const void *input, int thr_id )
    sph_hamsi512(&ctx.hamsi, hash, 64);
    sph_hamsi512_close(&ctx.hamsi, hash);
 
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512(&ctx.fugue, hash, 64);
-   sph_fugue512_close(&ctx.fugue, hash);
+#if defined(__AES__)
+    fugue512_full( &ctx.fugue, hash, hash, 64 );
+#else
+    sph_fugue512_full( &ctx.fugue, hash, hash, 64 );
+#endif
 
    sph_shabal512_init( &ctx.shabal );
    sph_shabal512(&ctx.shabal, hash, 64);
diff --git a/algo/x17/x17-4way.c b/algo/x17/x17-4way.c
index 4fe98bc2..cce38943 100644
--- a/algo/x17/x17-4way.c
+++ b/algo/x17/x17-4way.c
@@ -21,7 +21,7 @@
 #include "algo/simd/simd-hash-2way.h"
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/shabal/shabal-hash-4way.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/haval/haval-hash-4way.h"
@@ -49,7 +49,7 @@ union _x17_8way_context_overlay
 #endif
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_8way_context     sha512;
@@ -190,14 +190,14 @@ int x17_8way_hash( void *state, const void *input, int thr_id )
      dintrlv_8x64_512( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                        vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, 64 );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, 64 );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, 64 );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+     fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+     fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+     fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
      intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
                       hash7 );
@@ -250,7 +250,7 @@ union _x17_4way_context_overlay
     simd_2way_context       simd;
     hashState_echo          echo;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_4way_context     sha512;
@@ -328,10 +328,10 @@ int x17_4way_hash( void *state, const void *input, int thr_id )
 
      dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, 64 );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, 64 );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, 64 );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+     fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+     fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+     fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+     fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
      intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
 
diff --git a/algo/x17/x17.c b/algo/x17/x17.c
index dbfb1049..e6a9a06a 100644
--- a/algo/x17/x17.c
+++ b/algo/x17/x17.c
@@ -13,7 +13,6 @@
 #include "algo/skein/sph_skein.h"
 #include "algo/shavite/sph_shavite.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/shabal/sph_shabal.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/haval/sph-haval.h"
@@ -22,11 +21,13 @@
 #include "algo/simd/nist.h"
 #include <openssl/sha.h>
 #if defined(__AES__)
+  #include "algo/fugue/fugue-aesni.h"
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 union _x17_context_overlay
@@ -36,9 +37,11 @@ union _x17_context_overlay
 #if defined(__AES__)
         hashState_groestl       groestl;
         hashState_echo          echo;
+        hashState_fugue         fugue;
 #else
         sph_groestl512_context  groestl;
         sph_echo512_context     echo;
+        sph_fugue512_context    fugue;
 #endif
         sph_jh512_context       jh;
         sph_keccak512_context   keccak;
@@ -48,7 +51,6 @@ union _x17_context_overlay
         sph_shavite512_context  shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
@@ -122,9 +124,11 @@ int x17_hash(void *output, const void *input, int thr_id )
     sph_hamsi512_close( &ctx.hamsi, hash );
 
     // 13 Fugue
-    sph_fugue512_init( &ctx.fugue );
-    sph_fugue512(&ctx.fugue, hash, 64 );
-    sph_fugue512_close(&ctx.fugue, hash );
+#if defined(__AES__)
+    fugue512_full( &ctx.fugue, hash, hash, 64 );
+#else
+    sph_fugue512_full( &ctx.fugue, hash, hash, 64 );
+#endif
 
     // X14 Shabal
     sph_shabal512_init( &ctx.shabal );
diff --git a/algo/x17/xevan-4way.c b/algo/x17/xevan-4way.c
index fbf5d26c..beb9df6e 100644
--- a/algo/x17/xevan-4way.c
+++ b/algo/x17/xevan-4way.c
@@ -16,7 +16,7 @@
 #include "algo/simd/simd-hash-2way.h"
 #include "algo/echo/aes_ni/hash_api.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/shabal/shabal-hash-4way.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/sha/sha-hash-4way.h"
@@ -40,7 +40,7 @@ union _xevan_8way_context_overlay
    cube_4way_context       cube;
    simd_4way_context       simd;
    hamsi512_8way_context   hamsi;
-   sph_fugue512_context    fugue;
+   hashState_fugue         fugue;
    shabal512_8way_context  shabal;
    sph_whirlpool_context   whirlpool;
    sha512_8way_context     sha512;
@@ -192,14 +192,14 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
      dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                    vhash, dataLen<<3 );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
+     fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
+     fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
+     fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
+     fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
+     fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
+     fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
+     fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
+     fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
 
      intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
                   hash7, dataLen<<3 );
@@ -355,14 +355,14 @@ int xevan_8way_hash( void *output, const void *input, int thr_id )
      dintrlv_8x64( hash0, hash1, hash2, hash3, hash4, hash5, hash6, hash7,
                    vhash, dataLen<<3 );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
+     fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
+     fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
+     fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
+     fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
+     fugue512_full( &ctx.fugue, hash4, hash4, dataLen );
+     fugue512_full( &ctx.fugue, hash5, hash5, dataLen );
+     fugue512_full( &ctx.fugue, hash6, hash6, dataLen );
+     fugue512_full( &ctx.fugue, hash7, hash7, dataLen );
 
      intrlv_8x32( vhash, hash0, hash1, hash2, hash3, hash4, hash5, hash6,
                   hash7, dataLen<<3 );
@@ -415,7 +415,7 @@ union _xevan_4way_context_overlay
         simd_2way_context       simd;
         hashState_echo          echo;
         hamsi512_4way_context   hamsi;
-        sph_fugue512_context    fugue;
+        hashState_fugue         fugue;
         shabal512_4way_context  shabal;
         sph_whirlpool_context   whirlpool;
         sha512_4way_context     sha512;
@@ -498,10 +498,10 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
 
      dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
+     fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
+     fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
+     fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
+     fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
 
      // Parallel 4way 32 bit
      intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
@@ -595,10 +595,10 @@ int xevan_4way_hash( void *output, const void *input, int thr_id )
 
      dintrlv_4x64( hash0, hash1, hash2, hash3, vhash, dataLen<<3 );
 
-     sph_fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
-     sph_fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
+     fugue512_full( &ctx.fugue, hash0, hash0, dataLen );
+     fugue512_full( &ctx.fugue, hash1, hash1, dataLen );
+     fugue512_full( &ctx.fugue, hash2, hash2, dataLen );
+     fugue512_full( &ctx.fugue, hash3, hash3, dataLen );
 
      intrlv_4x32( vhash, hash0, hash1, hash2, hash3, dataLen<<3 );
 
diff --git a/algo/x17/xevan.c b/algo/x17/xevan.c
index 38bfa564..08ed580e 100644
--- a/algo/x17/xevan.c
+++ b/algo/x17/xevan.c
@@ -15,7 +15,6 @@
 #include "algo/shavite/sph_shavite.h"
 #include "algo/luffa/luffa_for_sse2.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/shabal/sph_shabal.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/haval/sph-haval.h"
@@ -25,9 +24,11 @@
 #if defined(__AES__)
   #include "algo/groestl/aes_ni/hash-groestl.h"
   #include "algo/echo/aes_ni/hash_api.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 
 typedef struct {
@@ -41,7 +42,6 @@ typedef struct {
         sph_shavite512_context  shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
@@ -49,9 +49,11 @@ typedef struct {
 #if defined(__AES__)
         hashState_echo          echo;
         hashState_groestl       groestl;
+        hashState_fugue         fugue;
 #else
 	sph_groestl512_context  groestl;
         sph_echo512_context     echo;
+        sph_fugue512_context    fugue;
 #endif
 } xevan_ctx_holder;
 
@@ -69,7 +71,6 @@ void init_xevan_ctx()
         sph_shavite512_init( &xevan_ctx.shavite );
         init_sd( &xevan_ctx.simd, 512 );
         sph_hamsi512_init( &xevan_ctx.hamsi );
-        sph_fugue512_init( &xevan_ctx.fugue );
         sph_shabal512_init( &xevan_ctx.shabal );
         sph_whirlpool_init( &xevan_ctx.whirlpool );
         SHA512_Init( &xevan_ctx.sha512 );
@@ -77,9 +78,11 @@ void init_xevan_ctx()
 #if defined(__AES__)
         init_groestl( &xevan_ctx.groestl, 64 );
         init_echo( &xevan_ctx.echo, 512 );
+        fugue512_Init( &xevan_ctx.fugue, 512 );
 #else
 	sph_groestl512_init( &xevan_ctx.groestl );
         sph_echo512_init( &xevan_ctx.echo );
+        sph_fugue512_init( &xevan_ctx.fugue );
 #endif
 };
 
@@ -137,8 +140,13 @@ int xevan_hash(void *output, const void *input, int thr_id )
 	sph_hamsi512(&ctx.hamsi, hash, dataLen);
 	sph_hamsi512_close(&ctx.hamsi, hash);
 
+#if defined(__AES__)
+    fugue512_Update( &ctx.fugue, hash, dataLen*8 );
+    fugue512_Final( &ctx.fugue, hash ); 
+#else
 	sph_fugue512(&ctx.fugue, hash, dataLen);
 	sph_fugue512_close(&ctx.fugue, hash);
+#endif
 
 	sph_shabal512(&ctx.shabal, hash, dataLen);
 	sph_shabal512_close(&ctx.shabal, hash);
@@ -202,8 +210,13 @@ int xevan_hash(void *output, const void *input, int thr_id )
 	sph_hamsi512(&ctx.hamsi, hash, dataLen);
 	sph_hamsi512_close(&ctx.hamsi, hash);
 
+#if defined(__AES__)
+    fugue512_Update( &ctx.fugue, hash, dataLen*8 );
+    fugue512_Final( &ctx.fugue, hash );   
+#else
 	sph_fugue512(&ctx.fugue, hash, dataLen);
 	sph_fugue512_close(&ctx.fugue, hash);
+#endif
 
 	sph_shabal512(&ctx.shabal, hash, dataLen);
 	sph_shabal512_close(&ctx.shabal, hash);
diff --git a/algo/x22/x22i-4way.c b/algo/x22/x22i-4way.c
index 8d519ee6..e61d1add 100644
--- a/algo/x22/x22i-4way.c
+++ b/algo/x22/x22i-4way.c
@@ -13,7 +13,7 @@
 #include "algo/simd/simd-hash-2way.h"
 #include "algo/shavite/sph_shavite.h"
 #include "algo/hamsi/hamsi-hash-4way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/shabal/shabal-hash-4way.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/sha/sha-hash-4way.h"
@@ -42,7 +42,7 @@ union _x22i_8way_ctx_overlay
     cube_4way_context       cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_8way_context     sha512;
@@ -225,30 +225,14 @@ int x22i_8way_hash( void *output, const void *input, int thrid )
    dintrlv_8x64_512( hash0, hash1, hash2, hash3,
                      hash4, hash5, hash6, hash7, vhash );
    
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash0, 64 );
-   sph_fugue512_close( &ctx.fugue, hash0 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash1, 64 );
-   sph_fugue512_close( &ctx.fugue, hash1 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash2, 64 );
-   sph_fugue512_close( &ctx.fugue, hash2 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash3, 64 );
-   sph_fugue512_close( &ctx.fugue, hash3 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash4, 64 );
-   sph_fugue512_close( &ctx.fugue, hash4 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash5, 64 );
-   sph_fugue512_close( &ctx.fugue, hash5 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash6, 64 );
-   sph_fugue512_close( &ctx.fugue, hash6 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash7, 64 );
-   sph_fugue512_close( &ctx.fugue, hash7 );
+   fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+   fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+   fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+   fugue512_full( &ctx.fugue, hash3, hash3, 64 );
+   fugue512_full( &ctx.fugue, hash4, hash4, 64 );
+   fugue512_full( &ctx.fugue, hash5, hash5, 64 );
+   fugue512_full( &ctx.fugue, hash6, hash6, 64 );
+   fugue512_full( &ctx.fugue, hash7, hash7, 64 );
 
    intrlv_8x32_512( vhash, hash0, hash1, hash2, hash3,
                            hash4, hash5, hash6, hash7 );
@@ -520,7 +504,7 @@ union _x22i_4way_ctx_overlay
     shavite512_2way_context shavite;
     simd_2way_context       simd;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_4way_context     sha512;
@@ -607,18 +591,10 @@ int x22i_4way_hash( void *output, const void *input, int thrid )
    hamsi512_4way_close( &ctx.hamsi, vhash );
    dintrlv_4x64_512( hash0, hash1, hash2, hash3, vhash );
 
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash0, 64 );
-   sph_fugue512_close( &ctx.fugue, hash0 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash1, 64 );
-   sph_fugue512_close( &ctx.fugue, hash1 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash2, 64 );
-   sph_fugue512_close( &ctx.fugue, hash2 );
-   sph_fugue512_init( &ctx.fugue );
-   sph_fugue512( &ctx.fugue, hash3, 64 );
-   sph_fugue512_close( &ctx.fugue, hash3 );
+   fugue512_full( &ctx.fugue, hash0, hash0, 64 );
+   fugue512_full( &ctx.fugue, hash1, hash1, 64 );
+   fugue512_full( &ctx.fugue, hash2, hash2, 64 );
+   fugue512_full( &ctx.fugue, hash3, hash3, 64 );
 
    intrlv_4x32_512( vhash, hash0, hash1, hash2, hash3 );
 
diff --git a/algo/x22/x22i.c b/algo/x22/x22i.c
index f3504204..247ea4ae 100644
--- a/algo/x22/x22i.c
+++ b/algo/x22/x22i.c
@@ -7,9 +7,11 @@
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 #include "algo/skein/sph_skein.h"
 #include "algo/jh/sph_jh.h"
@@ -19,7 +21,6 @@
 #include "algo/shavite/sph_shavite.h"
 #include "algo/simd/nist.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/shabal/sph_shabal.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include <openssl/sha.h>
@@ -36,9 +37,11 @@ union _x22i_context_overlay
 #if defined(__AES__)
         hashState_groestl       groestl;
         hashState_echo          echo;
+        hashState_fugue         fugue;
 #else
         sph_groestl512_context  groestl;
         sph_echo512_context     echo;
+        sph_fugue512_context    fugue;
 #endif
         sph_jh512_context       jh;
         sph_keccak512_context   keccak;
@@ -48,7 +51,6 @@ union _x22i_context_overlay
         sph_shavite512_context  shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
@@ -129,9 +131,13 @@ int x22i_hash( void *output, const void *input, int thrid )
 	sph_hamsi512(&ctx.hamsi, (const void*) hash, 64);
 	sph_hamsi512_close(&ctx.hamsi, hash);
 
+#if defined(__AES__)
+        fugue512_full( &ctx.fugue, hash, hash, 64 );
+#else
 	sph_fugue512_init(&ctx.fugue);
 	sph_fugue512(&ctx.fugue, (const void*) hash, 64);
 	sph_fugue512_close(&ctx.fugue, hash);
+#endif
 
 	sph_shabal512_init(&ctx.shabal);
 	sph_shabal512(&ctx.shabal, (const void*) hash, 64);
diff --git a/algo/x22/x25x-4way.c b/algo/x22/x25x-4way.c
index 3e672af6..1cdea113 100644
--- a/algo/x22/x25x-4way.c
+++ b/algo/x22/x25x-4way.c
@@ -18,7 +18,7 @@
 #include "algo/shavite/sph_shavite.h"
 #include "algo/simd/nist.h"
 #include "algo/simd/simd-hash-2way.h"
-#include "algo/fugue/sph_fugue.h"
+#include "algo/fugue/fugue-aesni.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include "algo/tiger/sph_tiger.h"
 #include "algo/lyra2/lyra2.h"
@@ -72,7 +72,7 @@ union _x25x_8way_ctx_overlay
     cube_4way_context       cube;
     simd_4way_context       simd;
     hamsi512_8way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_8way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_8way_context     sha512;
@@ -303,30 +303,15 @@ int x25x_8way_hash( void *output, const void *input, int thrid )
    dintrlv_8x64_512( hash0[11], hash1[11], hash2[11], hash3[11],
                      hash4[11], hash5[11], hash6[11], hash7[11], vhash );
    
-	sph_fugue512_init(&ctx.fugue);
-	sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64);
-	sph_fugue512_close(&ctx.fugue, hash0[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash1[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash2[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash2[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash3[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash3[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash4[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash4[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash5[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash5[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash6[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash6[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash7[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash7[12]);
+   fugue512_full( &ctx.fugue, hash0[12], hash0[11], 64 );
+   fugue512_full( &ctx.fugue, hash1[12], hash1[11], 64 );
+   fugue512_full( &ctx.fugue, hash2[12], hash2[11], 64 );
+   fugue512_full( &ctx.fugue, hash3[12], hash3[11], 64 );
+   fugue512_full( &ctx.fugue, hash4[12], hash4[11], 64 );
+   fugue512_full( &ctx.fugue, hash5[12], hash5[11], 64 );
+   fugue512_full( &ctx.fugue, hash6[12], hash6[11], 64 );
+   fugue512_full( &ctx.fugue, hash7[12], hash7[11], 64 );
+
    intrlv_8x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12],
                            hash4[12], hash5[12], hash6[12], hash7[12] );
 
@@ -652,7 +637,7 @@ union _x25x_4way_ctx_overlay
     sph_shavite512_context  shavite;
     hashState_sd            simd;
     hamsi512_4way_context   hamsi;
-    sph_fugue512_context    fugue;
+    hashState_fugue         fugue;
     shabal512_4way_context  shabal;
     sph_whirlpool_context   whirlpool;
     sha512_4way_context     sha512;
@@ -758,18 +743,10 @@ int x25x_4way_hash( void *output, const void *input, int thrid )
    hamsi512_4way_close( &ctx.hamsi, vhash );
    dintrlv_4x64_512( hash0[11], hash1[11], hash2[11], hash3[11], vhash );
 
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash0[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash0[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash1[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash1[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash2[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash2[12]);
-   sph_fugue512_init(&ctx.fugue);
-   sph_fugue512(&ctx.fugue, (const void*) hash3[11], 64);
-   sph_fugue512_close(&ctx.fugue, hash3[12]);
+   fugue512_full( &ctx.fugue, hash0[12], hash0[11], 64 );
+   fugue512_full( &ctx.fugue, hash1[12], hash1[11], 64 );
+   fugue512_full( &ctx.fugue, hash2[12], hash2[11], 64 );
+   fugue512_full( &ctx.fugue, hash3[12], hash3[11], 64 );
 
    intrlv_4x32_512( vhash, hash0[12], hash1[12], hash2[12], hash3[12] );
 
diff --git a/algo/x22/x25x.c b/algo/x22/x25x.c
index b3438099..7855698d 100644
--- a/algo/x22/x25x.c
+++ b/algo/x22/x25x.c
@@ -7,9 +7,11 @@
 #if defined(__AES__)
   #include "algo/echo/aes_ni/hash_api.h"
   #include "algo/groestl/aes_ni/hash-groestl.h"
+  #include "algo/fugue/fugue-aesni.h"
 #else
   #include "algo/groestl/sph_groestl.h"
   #include "algo/echo/sph_echo.h"
+  #include "algo/fugue/sph_fugue.h"
 #endif
 #include "algo/skein/sph_skein.h"
 #include "algo/jh/sph_jh.h"
@@ -19,7 +21,6 @@
 #include "algo/shavite/sph_shavite.h"
 #include "algo/simd/nist.h"
 #include "algo/hamsi/sph_hamsi.h"
-#include "algo/fugue/sph_fugue.h"
 #include "algo/shabal/sph_shabal.h"
 #include "algo/whirlpool/sph_whirlpool.h"
 #include <openssl/sha.h>
@@ -39,9 +40,11 @@ union _x25x_context_overlay
 #if defined(__AES__)
         hashState_groestl       groestl;
         hashState_echo          echo;
+        hashState_fugue         fugue;
 #else
         sph_groestl512_context  groestl;
         sph_echo512_context     echo;
+        sph_fugue512_context    fugue;
 #endif
         sph_jh512_context       jh;
         sph_keccak512_context   keccak;
@@ -51,7 +54,6 @@ union _x25x_context_overlay
         sph_shavite512_context  shavite;
         hashState_sd            simd;
         sph_hamsi512_context    hamsi;
-        sph_fugue512_context    fugue;
         sph_shabal512_context   shabal;
         sph_whirlpool_context   whirlpool;
         SHA512_CTX              sha512;
@@ -133,9 +135,13 @@ int x25x_hash( void *output, const void *input, int thrid )
 	sph_hamsi512(&ctx.hamsi, (const void*) &hash[10], 64);
 	sph_hamsi512_close(&ctx.hamsi, &hash[11]);
 
+#if defined(__AES__)
+        fugue512_full( &ctx.fugue, &hash[12], &hash[11], 64 );
+#else
 	sph_fugue512_init(&ctx.fugue);
 	sph_fugue512(&ctx.fugue, (const void*) &hash[11], 64);
 	sph_fugue512_close(&ctx.fugue, &hash[12]);
+#endif
 
 	sph_shabal512_init(&ctx.shabal);
 	sph_shabal512(&ctx.shabal, (const void*) &hash[12], 64);
diff --git a/build-allarch.sh b/build-allarch.sh
index 9f82dd1e..8b022d02 100755
--- a/build-allarch.sh
+++ b/build-allarch.sh
@@ -51,23 +51,23 @@ mv cpuminer.exe cpuminer-aes-sse42.exe
 strip -s cpuminer
 mv cpuminer cpuminer-aes-sse42
 
-#make clean || echo clean
-#rm -f config.status
-#CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl
-#make -j 8
-#strip -s cpuminer.exe
-#mv cpuminer.exe cpuminer-sse42.exe
-#strip -s cpuminer
-#mv cpuminer cpuminer-sse42
+make clean || echo clean
+rm -f config.status
+CFLAGS="-O3 -march=corei7 -Wall -fno-common" ./configure --with-curl
+make -j 8
+strip -s cpuminer.exe
+mv cpuminer.exe cpuminer-sse42.exe
+strip -s cpuminer
+mv cpuminer cpuminer-sse42
 
-#make clean || echo clean
-#rm -f config.status
-#CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl
-#make -j 8
-#strip -s cpuminer.exe
-#mv cpuminer.exe cpuminer-ssse3.exe
-#strip -s cpuminer
-#mv cpuminer cpuminer-ssse3
+make clean || echo clean
+rm -f config.status
+CFLAGS="-O3 -march=core2 -Wall -fno-common" ./configure --with-curl
+make -j 8
+strip -s cpuminer.exe
+mv cpuminer.exe cpuminer-ssse3.exe
+strip -s cpuminer
+mv cpuminer cpuminer-ssse3
 
 make clean || echo clean
 rm -f config.status
diff --git a/clean-all.sh b/clean-all.sh
index aba2de3d..42aa3ffc 100755
--- a/clean-all.sh
+++ b/clean-all.sh
@@ -3,8 +3,8 @@
 # imake clean and rm all the targetted executables.
 # tips to users.
 
-rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen  > /dev/null
+rm cpuminer-avx512-sha-vaes cpuminer-avx512 cpuminer-avx2 cpuminer-avx cpuminer-aes-sse42 cpuminer-sse2 cpuminer-zen cpuminer-sse42 cpuminer-ssse3 > /dev/null
 
-rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe  > /dev/null
+rm cpuminer-avx512-sha-vaes.exe cpuminer-avx512.exe cpuminer-avx2.exe cpuminer-avx.exe cpuminer-aes-sse42.exe cpuminer-sse2.exe cpuminer-zen.exe  cpuminer-sse42 cpuminer-ssse3 > /dev/null
 
 make distclean > /dev/null
diff --git a/configure b/configure
index d427b666..6bb6cb34 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.14.3.
+# Generated by GNU Autoconf 2.69 for cpuminer-opt 3.15.0.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -577,8 +577,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='cpuminer-opt'
 PACKAGE_TARNAME='cpuminer-opt'
-PACKAGE_VERSION='3.14.3'
-PACKAGE_STRING='cpuminer-opt 3.14.3'
+PACKAGE_VERSION='3.15.0'
+PACKAGE_STRING='cpuminer-opt 3.15.0'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
@@ -1332,7 +1332,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures cpuminer-opt 3.14.3 to adapt to many kinds of systems.
+\`configure' configures cpuminer-opt 3.15.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1404,7 +1404,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of cpuminer-opt 3.14.3:";;
+     short | recursive ) echo "Configuration of cpuminer-opt 3.15.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-cpuminer-opt configure 3.14.3
+cpuminer-opt configure 3.15.0
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2012,7 +2012,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by cpuminer-opt $as_me 3.14.3, which was
+It was created by cpuminer-opt $as_me 3.15.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2993,7 +2993,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='cpuminer-opt'
- VERSION='3.14.3'
+ VERSION='3.15.0'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -6690,7 +6690,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by cpuminer-opt $as_me 3.14.3, which was
+This file was extended by cpuminer-opt $as_me 3.15.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -6756,7 +6756,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-cpuminer-opt config.status 3.14.3
+cpuminer-opt config.status 3.15.0
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 9654d17f..69a0f573 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([cpuminer-opt], [3.14.3])
+AC_INIT([cpuminer-opt], [3.15.0])
 
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index 0cad187a..0b80c530 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -2330,8 +2330,8 @@ static void *miner_thread( void *userdata )
        // If unsubmiited nonce(s) found, submit now. 
        if ( unlikely( nonce_found && !opt_benchmark ) )
        {  
-          applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.",
-                               algo_names[ opt_algo ] );
+//          applog( LOG_WARNING, "BUG: See RELEASE_NOTES for reporting bugs. Algo = %s.",
+//                               algo_names[ opt_algo ] );
           if ( !submit_work( mythr, &work ) )
           {
              applog( LOG_WARNING, "Failed to submit share." );
@@ -2363,14 +2363,14 @@ static void *miner_thread( void *userdata )
 
        prev_hi_temp = hi_temp;
        curr_temp = cpu_temp(0);
-       timeval_subtract( &diff, &tv_end, &cpu_temp_time );
        if ( curr_temp > hi_temp ) hi_temp = curr_temp;
 
        pthread_mutex_unlock( &stats_lock );
 
        if ( !opt_quiet || ( curr_temp >= 80 ) )
        {
-          int wait_time = curr_temp >= 80 ? 30 : curr_temp >= 70 ? 60 : 120;
+          int wait_time = curr_temp >= 80 ? 20 : curr_temp >= 70 ? 60 : 120;
+          timeval_subtract( &diff, &tv_end, &cpu_temp_time );
           if ( ( diff.tv_sec > wait_time ) || ( curr_temp > prev_hi_temp ) )
           {
              char tempstr[32];
@@ -2747,7 +2747,10 @@ static void *stratum_thread(void *userdata )
             sleep(opt_fail_pause);
          }
          else
+         {
+            restart_threads();
             applog(LOG_BLUE,"Stratum connection established" );
+         }
       }
 
       report_summary_log( ( stratum_diff != stratum.job.diff )
diff --git a/sysinfos.c b/sysinfos.c
index 17aa69e4..c010a9af 100644
--- a/sysinfos.c
+++ b/sysinfos.c
@@ -1,4 +1,4 @@
-#if !defined(SYSINJFOS_C___)
+#if !defined(SYSINFOS_C__)
 #define SYSINFOS_C__
 
 /**
diff --git a/util.c b/util.c
index 14c7286a..0eee4282 100644
--- a/util.c
+++ b/util.c
@@ -1096,9 +1096,10 @@ bool fulltest( const uint32_t *hash, const uint32_t *target )
 // increases the effective precision. Due to the floating nature of the 
 // decimal point leading zeros aren't counted.
 //
-// Unfortunately I can't get float128 to work so long double it is.
+// Unfortunately I can't get float128 to work so long double (float80) is
+// as precise as it gets.
 // All calculations will be done using long double then converted to double.
-// This prevent introducing significant new error while taking advantage
+// This prevents introducing significant new error while taking advantage
 // of HW rounding.
 
 #if defined(GCC_INT128)
@@ -1107,7 +1108,8 @@ void diff_to_hash( uint32_t *target, const double diff )
 {
   uint128_t *targ = (uint128_t*)target;
   register long double m = 1. / diff;
-  targ[0] = 0;
+//  targ[0] = 0;
+  targ[0] = -1;
   targ[1] = (uint128_t)( m * exp96 );
 }
 
@@ -1135,7 +1137,8 @@ void diff_to_hash( uint32_t *target, const double diff )
 {
   uint64_t *targ = (uint64_t*)target;
   register long double m = ( 1. / diff ) * exp32;
-  targ[1] = targ[0] = 0;
+//  targ[1] = targ[0] = 0;
+  targ[1] = targ[0] = -1;
   targ[3] = (uint64_t)m;
   targ[2] = (uint64_t)( ( m - (long double)targ[3] ) * exp64 );
 }