From 138c1075435e6e1f40e21d0eb6d4133ac7f28841 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Fri, 22 Feb 2019 10:48:32 +0100 Subject: [PATCH] Discard insufficient partial bZip2 matches - Use the ratio that was used before for partial zLib matches - Related: #95 --- precomp.cpp | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/precomp.cpp b/precomp.cpp index 0668fe2..b73916b 100644 --- a/precomp.cpp +++ b/precomp.cpp @@ -5239,14 +5239,31 @@ void try_recompress_bzip2(FILE* origfile, int level, long long& compressed_strea } } - best_identical_bytes_decomp = identical_bytes_decomp; - best_identical_bytes = identical_bytes; - if (penalty_bytes_len > 0) { - memcpy(best_penalty_bytes, penalty_bytes, penalty_bytes_len); - best_penalty_bytes_len = penalty_bytes_len; - } else { - best_penalty_bytes_len = 0; - } + // Partial matches sometimes need all the decompressed bytes, but there are much less + // identical recompressed bytes - in these cases, all the decompressed bytes have to + // be stored together with the remaining recompressed bytes, so the result won't compress + // better than the original stream. What's important here is the ratio between recompressed ratio + // and decompressed ratio that shouldn't get too high. + // Example: A stream has 5 of 1000 identical recompressed bytes, but needs 1000 of 1000 decompressed bytes, + // so the ratio is (1000/1000)/(5/1000) = 200 which is too high. With 5 of 1000 decompressed bytes or + // 1000 of 1000 identical recompressed bytes, ratio would've been 1 and we'd accept it. + + float partial_ratio = ((float)identical_bytes_decomp / decomp_bytes_total) / ((float)identical_bytes / compressed_stream_size); + if (partial_ratio < 3.0f) { + best_identical_bytes_decomp = identical_bytes_decomp; + best_identical_bytes = identical_bytes; + if (penalty_bytes_len > 0) { + memcpy(best_penalty_bytes, penalty_bytes, penalty_bytes_len); + best_penalty_bytes_len = penalty_bytes_len; + } + else { + best_penalty_bytes_len = 0; + } + } else { + if (DEBUG_MODE) { + printf("Not enough identical recompressed bytes\n"); + } + } } } }