Skip to content

Commit

Permalink
Performance improvements compared to previous release: code works 15-…
Browse files Browse the repository at this point in the history
…35% faster.

- Using COMPUTE_HASH2 path in Asm code, it wass polished and now always enabled.
- Ported this code to C.
  • Loading branch information
gildor2 committed Feb 22, 2017
1 parent 6980f71 commit ab56f93
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 31 deletions.
24 changes: 22 additions & 2 deletions Sources/match.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ local uInt longest_match(s, cur_match)
*/
IPos pos, next_pos;
register int i;
register uInt hash;
Bytef* scan_end;

/* go back to offset 0 */
cur_match -= offset;
Expand All @@ -220,15 +222,33 @@ local uInt longest_match(s, cur_match)
offset = i;
}
}
/* switch cur_match to next_pos chain */
/* Switch cur_match to next_pos chain */
cur_match = next_pos;

/* Try hash head at len-(MIN_MATCH-1) position to see if we could get
* a better cur_match at the end of string. Using (MIN_MATCH-1) lets
* us to include one more byte into hash - the byte which will be checked
* in main loop now, and which allows to grow match by 1.
*/
hash = 0;
scan_end = scan + len - MIN_MATCH + 1;
UPDATE_HASH(s, hash, scan_end[0]);
UPDATE_HASH(s, hash, scan_end[1]);
UPDATE_HASH(s, hash, scan_end[2]);
pos = s->head[hash];
if (pos < cur_match) {
offset = len - MIN_MATCH + 1;
if (pos <= limit_base + offset) goto break_matching;
cur_match = pos;
}

/* update offset-dependent vars */
limit = limit_base + offset;
match_base = s->window - offset;
UPDATE_MATCH_BASE2;
continue;
} else {
/* no way to change offset - simply update match_base2 for
/* There's no way to change offset - simply update match_base2 for
* new best_len (this is similar to what original algorithm does)
*/
UPDATE_MATCH_BASE2;
Expand Down
52 changes: 23 additions & 29 deletions Sources/match32.asm
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@

; Configuration (do not change unless for testing)

;%define COMPUTE_HASH2 ; useless - same ratio/speed
;%define REFINE_MATCHES ; this option produces slightly different compression results - sometimes better, sometimes worse
;%define ALWAYS_ZERO_OFFSET ; DEBUG: work just like original algorithm, with zero offset
;%define ALWAYS_ZERO_OFFSET ; DEBUG: work just like original algorithm, with zero offset; the performance will be nearly equal
; to original zlib with asm optimization

; Debugging options

Expand Down Expand Up @@ -637,7 +637,6 @@ _longest_match:
%ifdef ALWAYS_ZERO_OFFSET
jmp .continue
%endif
;?? goto COMPUTE_HASH2 instead of .continue
; if (len <= MIN_MATCH ...
cmp eax,MIN_MATCH
jle .continue
Expand Down Expand Up @@ -689,19 +688,23 @@ _longest_match:
movzx ebx,word [edi+ebp*2]
cmp ebx,ecx
jbe .break_match ; one of chains either too far, or NIL
inc ecx ; update limit: limit = limit_base+i
cmp ebx,edx
jnc .scan_match_loop ; current chain is less distant than remembered
mov edx,ebx
sub ebp,eax ; offset = EBP-old_match
mov [offset],ebp ; NOTE: should mask offset with "wmask" later
add ebp,eax ; return EBP, add EAX back
add ebp,eax ; revert EBP value (add EAX back)
jmp .scan_match_loop

.scan_match_end:
movzx esi,si ; at this point ESI.H == 0xFFFF -- reset it
%ifdef COMPUTE_HASH2
; Try to check verify hash heads to see if they points to longer distance than we have now
; Here: EDX=next_pos, ESI=wmask, EDI=prev
; Try to check verify hash head at the end of current string, including one more byte,
; to see if it points to longer distance than we have now.
; Here:
; EDX = next_pos
; ESI = wmask
; EDI = prev
mov ebp,[scan]
add ebp,[best_len]
mov ecx,[hash_shift] ; ECX = hash_shift
Expand All @@ -710,36 +713,27 @@ _longest_match:
xor al,[ebp-MIN_MATCH+2]
shl eax,cl
xor al,[ebp-MIN_MATCH+3]
mov ecx,[limit_base] ; ECX = limit
and eax,[hash_mask]
; EAX = hash
mov ebp,[hash_heads]
; check head[hash]
movzx eax,word [ebp+eax*2]
cmp eax,ecx ; limit
jbe .break_match
cmp eax,edx
jb .comp_hash_found
; check hash chains
%rep 0 ; if enable this, can skip 1st match
and eax,esi ; &= wmask
movzx eax,word [edi+eax*2] ; prev[EAX]
cmp eax,ecx ; limit
jbe .break_match
cmp eax,edx
jb .comp_hash_found
%endrep
jmp .comp_hash_skip
.comp_hash_found:
mov edx,eax
mov eax,[best_len]
movzx ebx,word [ebp+eax*2] ; EBX = hash_heads[hash]
mov eax,[best_len] ; compute offset to EAX
sub eax,MIN_MATCH-1
mov ecx,[limit_base] ; ECX = limit_base + offset
add ecx,eax
cmp ebx,ecx ; check limit
jbe .break_match
cmp ebx,edx
jae .comp_hash_skip ; this is not a better match
mov edx,ebx
jmp .set_offset
.comp_hash_skip:
%endif ; COMPUTE_HASH2
mov eax,[offset] ; EAX = offset
and eax,esi ; offset &= wmask
.set_offset:
and eax,esi ; offset &= wmask
; EAX = offset
; EDX = new match
mov [offset],eax
mov ebx,[old_offset]
sub ebx,eax ; EBX = old_offset-offset
Expand All @@ -757,7 +751,7 @@ _longest_match:

;------------------------------------------------------------------------------

; This is a main magic line in this file, please DO NOT REMOVE! ;-)
; Please do not remove this string!
db 13,10,' Fast match finder for zlib, http://www.gildor.org/en/projects/zlib ',13,10,0

;------------------------------------------------------------------------------
Expand Down

0 comments on commit ab56f93

Please sign in to comment.