From ab56f93778ed12ab6d14bd9114d9f6ad4d79a0c3 Mon Sep 17 00:00:00 2001 From: Konstantin Nosov Date: Wed, 22 Feb 2017 12:59:06 +0300 Subject: [PATCH] Performance improvements compared to previous release: code works 15-35% faster. - Using COMPUTE_HASH2 path in Asm code, it wass polished and now always enabled. - Ported this code to C. --- Sources/match.h | 24 +++++++++++++++++++-- Sources/match32.asm | 52 ++++++++++++++++++++------------------------- 2 files changed, 45 insertions(+), 31 deletions(-) diff --git a/Sources/match.h b/Sources/match.h index 1410d32..609222f 100644 --- a/Sources/match.h +++ b/Sources/match.h @@ -206,6 +206,8 @@ local uInt longest_match(s, cur_match) */ IPos pos, next_pos; register int i; + register uInt hash; + Bytef* scan_end; /* go back to offset 0 */ cur_match -= offset; @@ -220,15 +222,33 @@ local uInt longest_match(s, cur_match) offset = i; } } - /* switch cur_match to next_pos chain */ + /* Switch cur_match to next_pos chain */ cur_match = next_pos; + + /* Try hash head at len-(MIN_MATCH-1) position to see if we could get + * a better cur_match at the end of string. Using (MIN_MATCH-1) lets + * us to include one more byte into hash - the byte which will be checked + * in main loop now, and which allows to grow match by 1. + */ + hash = 0; + scan_end = scan + len - MIN_MATCH + 1; + UPDATE_HASH(s, hash, scan_end[0]); + UPDATE_HASH(s, hash, scan_end[1]); + UPDATE_HASH(s, hash, scan_end[2]); + pos = s->head[hash]; + if (pos < cur_match) { + offset = len - MIN_MATCH + 1; + if (pos <= limit_base + offset) goto break_matching; + cur_match = pos; + } + /* update offset-dependent vars */ limit = limit_base + offset; match_base = s->window - offset; UPDATE_MATCH_BASE2; continue; } else { - /* no way to change offset - simply update match_base2 for + /* There's no way to change offset - simply update match_base2 for * new best_len (this is similar to what original algorithm does) */ UPDATE_MATCH_BASE2; diff --git a/Sources/match32.asm b/Sources/match32.asm index 2ae2724..070f010 100644 --- a/Sources/match32.asm +++ b/Sources/match32.asm @@ -31,9 +31,9 @@ ; Configuration (do not change unless for testing) -;%define COMPUTE_HASH2 ; useless - same ratio/speed ;%define REFINE_MATCHES ; this option produces slightly different compression results - sometimes better, sometimes worse -;%define ALWAYS_ZERO_OFFSET ; DEBUG: work just like original algorithm, with zero offset +;%define ALWAYS_ZERO_OFFSET ; DEBUG: work just like original algorithm, with zero offset; the performance will be nearly equal + ; to original zlib with asm optimization ; Debugging options @@ -637,7 +637,6 @@ _longest_match: %ifdef ALWAYS_ZERO_OFFSET jmp .continue %endif - ;?? goto COMPUTE_HASH2 instead of .continue ; if (len <= MIN_MATCH ... cmp eax,MIN_MATCH jle .continue @@ -689,19 +688,23 @@ _longest_match: movzx ebx,word [edi+ebp*2] cmp ebx,ecx jbe .break_match ; one of chains either too far, or NIL + inc ecx ; update limit: limit = limit_base+i cmp ebx,edx jnc .scan_match_loop ; current chain is less distant than remembered mov edx,ebx sub ebp,eax ; offset = EBP-old_match mov [offset],ebp ; NOTE: should mask offset with "wmask" later - add ebp,eax ; return EBP, add EAX back + add ebp,eax ; revert EBP value (add EAX back) jmp .scan_match_loop .scan_match_end: movzx esi,si ; at this point ESI.H == 0xFFFF -- reset it -%ifdef COMPUTE_HASH2 - ; Try to check verify hash heads to see if they points to longer distance than we have now - ; Here: EDX=next_pos, ESI=wmask, EDI=prev + ; Try to check verify hash head at the end of current string, including one more byte, + ; to see if it points to longer distance than we have now. + ; Here: + ; EDX = next_pos + ; ESI = wmask + ; EDI = prev mov ebp,[scan] add ebp,[best_len] mov ecx,[hash_shift] ; ECX = hash_shift @@ -710,36 +713,27 @@ _longest_match: xor al,[ebp-MIN_MATCH+2] shl eax,cl xor al,[ebp-MIN_MATCH+3] - mov ecx,[limit_base] ; ECX = limit and eax,[hash_mask] ; EAX = hash mov ebp,[hash_heads] ; check head[hash] - movzx eax,word [ebp+eax*2] - cmp eax,ecx ; limit - jbe .break_match - cmp eax,edx - jb .comp_hash_found - ; check hash chains -%rep 0 ; if enable this, can skip 1st match - and eax,esi ; &= wmask - movzx eax,word [edi+eax*2] ; prev[EAX] - cmp eax,ecx ; limit - jbe .break_match - cmp eax,edx - jb .comp_hash_found -%endrep - jmp .comp_hash_skip -.comp_hash_found: - mov edx,eax - mov eax,[best_len] + movzx ebx,word [ebp+eax*2] ; EBX = hash_heads[hash] + mov eax,[best_len] ; compute offset to EAX sub eax,MIN_MATCH-1 + mov ecx,[limit_base] ; ECX = limit_base + offset + add ecx,eax + cmp ebx,ecx ; check limit + jbe .break_match + cmp ebx,edx + jae .comp_hash_skip ; this is not a better match + mov edx,ebx jmp .set_offset .comp_hash_skip: -%endif ; COMPUTE_HASH2 mov eax,[offset] ; EAX = offset - and eax,esi ; offset &= wmask .set_offset: + and eax,esi ; offset &= wmask + ; EAX = offset + ; EDX = new match mov [offset],eax mov ebx,[old_offset] sub ebx,eax ; EBX = old_offset-offset @@ -757,7 +751,7 @@ _longest_match: ;------------------------------------------------------------------------------ - ; This is a main magic line in this file, please DO NOT REMOVE! ;-) + ; Please do not remove this string! db 13,10,' Fast match finder for zlib, http://www.gildor.org/en/projects/zlib ',13,10,0 ;------------------------------------------------------------------------------