From abf3e6cba82bf661128a2a0ac12e89fbda347659 Mon Sep 17 00:00:00 2001
From: Andrei Stoian <andrei.stoian@zama.ai>
Date: Wed, 12 Jun 2024 12:11:01 +0200
Subject: [PATCH 1/6] fix: key compression always

---
 src/concrete/ml/sklearn/base.py         | 1 +
 src/concrete/ml/sklearn/linear_model.py | 2 +-
 src/concrete/ml/torch/compile.py        | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 2acd87eb9..a8e2c94c3 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -582,6 +582,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
+            compress_eval_keys=True,
         )
 
         self._is_compiled = True
diff --git a/src/concrete/ml/sklearn/linear_model.py b/src/concrete/ml/sklearn/linear_model.py
index 341a57fd7..6e6cd861f 100644
--- a/src/concrete/ml/sklearn/linear_model.py
+++ b/src/concrete/ml/sklearn/linear_model.py
@@ -347,7 +347,7 @@ def _get_training_quantized_module(
         # Enable the underlying FHE circuit to be composed with itself
         # This feature is used in order to be able to iterate in the clear n times without having
         # to encrypt/decrypt the weight/bias values between each loop
-        configuration = Configuration(composable=True)
+        configuration = Configuration(composable=True, compress_evaluation_keys=True)
 
         composition_mapping = {0: 2, 1: 3}
 
diff --git a/src/concrete/ml/torch/compile.py b/src/concrete/ml/torch/compile.py
index 398e6a907..143bd17c8 100644
--- a/src/concrete/ml/torch/compile.py
+++ b/src/concrete/ml/torch/compile.py
@@ -254,6 +254,7 @@ def _compile_torch_or_onnx_model(
         global_p_error=global_p_error,
         verbose=verbose,
         inputs_encryption_status=inputs_encryption_status,
+        compress_eval_keys=True,
     )
 
     return quantized_module

From 3703f73e072a7c4d9a12ab5550d19f082cd1aff3 Mon Sep 17 00:00:00 2001
From: Andrei Stoian <andrei.stoian@zama.ai>
Date: Wed, 12 Jun 2024 13:20:29 +0200
Subject: [PATCH 2/6] fix: compress key always

---
 src/concrete/ml/quantization/quantized_module.py | 1 +
 src/concrete/ml/torch/compile.py                 | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/concrete/ml/quantization/quantized_module.py b/src/concrete/ml/quantization/quantized_module.py
index de9a071fa..2d83d33cc 100644
--- a/src/concrete/ml/quantization/quantized_module.py
+++ b/src/concrete/ml/quantization/quantized_module.py
@@ -889,6 +889,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
+            compress_eval_keys=True,
         )
 
         self._is_compiled = True
diff --git a/src/concrete/ml/torch/compile.py b/src/concrete/ml/torch/compile.py
index 143bd17c8..398e6a907 100644
--- a/src/concrete/ml/torch/compile.py
+++ b/src/concrete/ml/torch/compile.py
@@ -254,7 +254,6 @@ def _compile_torch_or_onnx_model(
         global_p_error=global_p_error,
         verbose=verbose,
         inputs_encryption_status=inputs_encryption_status,
-        compress_eval_keys=True,
     )
 
     return quantized_module

From d86666888d76673cc92b45b42e01f067f1780a14 Mon Sep 17 00:00:00 2001
From: Andrei Stoian <andrei.stoian@zama.ai>
Date: Wed, 12 Jun 2024 13:25:41 +0200
Subject: [PATCH 3/6] fix: compress key always

---
 conftest.py                              | 2 ++
 docker/release_resources/sanity_check.py | 4 ++++
 tests/torch/test_hybrid_converter.py     | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/conftest.py b/conftest.py
index edf9a2ee1..447dbaef1 100644
--- a/conftest.py
+++ b/conftest.py
@@ -155,6 +155,7 @@ def default_configuration():
         fhe_simulation=False,
         fhe_execution=True,
         compress_input_ciphertexts=os.environ.get("USE_INPUT_COMPRESSION", "1") == "1",
+        compress_eval_keys=True,
     )
 
 
@@ -173,6 +174,7 @@ def simulation_configuration():
         fhe_simulation=True,
         fhe_execution=False,
         compress_input_ciphertexts=os.environ.get("USE_INPUT_COMPRESSION", "1") == "1",
+        compress_eval_keys=True,
     )
 
 
diff --git a/docker/release_resources/sanity_check.py b/docker/release_resources/sanity_check.py
index abaa8902a..7a2f355c3 100644
--- a/docker/release_resources/sanity_check.py
+++ b/docker/release_resources/sanity_check.py
@@ -75,6 +75,8 @@ def ml_check(args, keyring_dir_as_str):
         enable_unsafe_features=True,
         use_insecure_key_cache=is_fast,
         insecure_key_cache_location=keyring_dir_as_str,
+        compress_input_ciphertexts=True,
+        compress_eval_keys=True,
     )
 
     # We first compile the model with some data, here the training set
@@ -120,6 +122,8 @@ def function_to_compile(x):
         enable_unsafe_features=is_fast,
         use_insecure_key_cache=is_fast,
         insecure_key_cache_location=keyring_dir_as_str,
+        compress_input_ciphertexts=True,
+        compress_eval_keys=True,
     )
 
     print("Compiling...")
diff --git a/tests/torch/test_hybrid_converter.py b/tests/torch/test_hybrid_converter.py
index b2ce6e07c..955427e76 100644
--- a/tests/torch/test_hybrid_converter.py
+++ b/tests/torch/test_hybrid_converter.py
@@ -40,6 +40,8 @@ def run_hybrid_llm_test(
     # Multi-parameter strategy is used in order to speed-up the FHE executions
     configuration = Configuration(
         single_precision=False,
+        compress_input_ciphertexts=True,
+        compress_eval_keys=True,
     )
 
     # Create a hybrid model

From cabc34b795e6d0d443c9cf35e87d8a6b4938fd43 Mon Sep 17 00:00:00 2001
From: Andrei Stoian <andrei.stoian@zama.ai>
Date: Wed, 12 Jun 2024 14:34:55 +0200
Subject: [PATCH 4/6] fix: fix wrong flag

---
 conftest.py                                      | 2 +-
 src/concrete/ml/quantization/quantized_module.py | 2 +-
 src/concrete/ml/sklearn/base.py                  | 2 +-
 tests/torch/test_hybrid_converter.py             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conftest.py b/conftest.py
index 447dbaef1..fdbd14319 100644
--- a/conftest.py
+++ b/conftest.py
@@ -155,7 +155,7 @@ def default_configuration():
         fhe_simulation=False,
         fhe_execution=True,
         compress_input_ciphertexts=os.environ.get("USE_INPUT_COMPRESSION", "1") == "1",
-        compress_eval_keys=True,
+        compress_evaluation_keys=True,
     )
 
 
diff --git a/src/concrete/ml/quantization/quantized_module.py b/src/concrete/ml/quantization/quantized_module.py
index 2d83d33cc..60a399b00 100644
--- a/src/concrete/ml/quantization/quantized_module.py
+++ b/src/concrete/ml/quantization/quantized_module.py
@@ -889,7 +889,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
-            compress_eval_keys=True,
+            compress_evaluation_keys=True,
         )
 
         self._is_compiled = True
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index a8e2c94c3..95bfe94b5 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -582,7 +582,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
-            compress_eval_keys=True,
+            compress_evaluation_keys=True,
         )
 
         self._is_compiled = True
diff --git a/tests/torch/test_hybrid_converter.py b/tests/torch/test_hybrid_converter.py
index 955427e76..e37e156ee 100644
--- a/tests/torch/test_hybrid_converter.py
+++ b/tests/torch/test_hybrid_converter.py
@@ -41,7 +41,7 @@ def run_hybrid_llm_test(
     configuration = Configuration(
         single_precision=False,
         compress_input_ciphertexts=True,
-        compress_eval_keys=True,
+        compress_evaluation_keys=True,
     )
 
     # Create a hybrid model

From ffbea40eda0c031215b7805abb431fbdc5dc7e54 Mon Sep 17 00:00:00 2001
From: Andrei Stoian <andrei.stoian@zama.ai>
Date: Wed, 12 Jun 2024 15:30:30 +0200
Subject: [PATCH 5/6] fix: use env variable

---
 conftest.py                                      | 2 --
 docker/release_resources/sanity_check.py         | 3 +--
 src/concrete/ml/common/utils.py                  | 5 +++++
 src/concrete/ml/quantization/quantized_module.py | 3 ++-
 src/concrete/ml/sklearn/base.py                  | 4 +++-
 tests/torch/test_hybrid_converter.py             | 1 -
 6 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/conftest.py b/conftest.py
index fdbd14319..edf9a2ee1 100644
--- a/conftest.py
+++ b/conftest.py
@@ -155,7 +155,6 @@ def default_configuration():
         fhe_simulation=False,
         fhe_execution=True,
         compress_input_ciphertexts=os.environ.get("USE_INPUT_COMPRESSION", "1") == "1",
-        compress_evaluation_keys=True,
     )
 
 
@@ -174,7 +173,6 @@ def simulation_configuration():
         fhe_simulation=True,
         fhe_execution=False,
         compress_input_ciphertexts=os.environ.get("USE_INPUT_COMPRESSION", "1") == "1",
-        compress_eval_keys=True,
     )
 
 
diff --git a/docker/release_resources/sanity_check.py b/docker/release_resources/sanity_check.py
index 7a2f355c3..535437584 100644
--- a/docker/release_resources/sanity_check.py
+++ b/docker/release_resources/sanity_check.py
@@ -76,7 +76,6 @@ def ml_check(args, keyring_dir_as_str):
         use_insecure_key_cache=is_fast,
         insecure_key_cache_location=keyring_dir_as_str,
         compress_input_ciphertexts=True,
-        compress_eval_keys=True,
     )
 
     # We first compile the model with some data, here the training set
@@ -123,7 +122,7 @@ def function_to_compile(x):
         use_insecure_key_cache=is_fast,
         insecure_key_cache_location=keyring_dir_as_str,
         compress_input_ciphertexts=True,
-        compress_eval_keys=True,
+        compress_evaluation_keys=True,
     )
 
     print("Compiling...")
diff --git a/src/concrete/ml/common/utils.py b/src/concrete/ml/common/utils.py
index 903ce214f..484676602 100644
--- a/src/concrete/ml/common/utils.py
+++ b/src/concrete/ml/common/utils.py
@@ -53,6 +53,11 @@
 # However, for internal testing purposes, we retain the capability to disable this feature
 os.environ["USE_INPUT_COMPRESSION"] = os.environ.get("USE_INPUT_COMPRESSION", "1")
 
+# Enable PBS evaluation key compression (~4x size reduction)
+# Note: This setting is fixed and cannot be altered by users
+# However, for internal testing purposes, we retain the capability to disable this feature
+os.environ["USE_KEY_COMPRESSION"] = os.environ.get("USE_KEY_COMPRESSION", "1")
+
 
 class FheMode(str, enum.Enum):
     """Enum representing the execution mode.
diff --git a/src/concrete/ml/quantization/quantized_module.py b/src/concrete/ml/quantization/quantized_module.py
index 60a399b00..41716c51f 100644
--- a/src/concrete/ml/quantization/quantized_module.py
+++ b/src/concrete/ml/quantization/quantized_module.py
@@ -876,6 +876,7 @@ def compile(
 
         # Enable input ciphertext compression
         enable_input_compression = os.environ.get("USE_INPUT_COMPRESSION", "1") == "1"
+        enable_key_compression = os.environ.get("USE_KEY_COMPRESSION", "1") == "1"
 
         self.fhe_circuit = compiler.compile(
             inputset,
@@ -889,7 +890,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
-            compress_evaluation_keys=True,
+            compress_evaluation_keys=enable_key_compression,
         )
 
         self._is_compiled = True
diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
index 95bfe94b5..92916011c 100644
--- a/src/concrete/ml/sklearn/base.py
+++ b/src/concrete/ml/sklearn/base.py
@@ -569,6 +569,8 @@ def compile(
 
         # Enable input ciphertext compression
         enable_input_compression = os.environ.get("USE_INPUT_COMPRESSION", "1") == "1"
+        # Enable evaluation key compression
+        enable_key_compression = os.environ.get("USE_KEY_COMPRESSION", "1") == "1"
 
         self.fhe_circuit_ = module_to_compile.compile(
             inputset,
@@ -582,7 +584,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
-            compress_evaluation_keys=True,
+            compress_evaluation_keys=enable_key_compression,
         )
 
         self._is_compiled = True
diff --git a/tests/torch/test_hybrid_converter.py b/tests/torch/test_hybrid_converter.py
index e37e156ee..7becb76cc 100644
--- a/tests/torch/test_hybrid_converter.py
+++ b/tests/torch/test_hybrid_converter.py
@@ -41,7 +41,6 @@ def run_hybrid_llm_test(
     configuration = Configuration(
         single_precision=False,
         compress_input_ciphertexts=True,
-        compress_evaluation_keys=True,
     )
 
     # Create a hybrid model

From 3a8dca0b63fd02e5f44bc442e433bc97bb503fc7 Mon Sep 17 00:00:00 2001
From: Andrei Stoian <andrei.stoian@zama.ai>
Date: Thu, 13 Jun 2024 09:45:50 +0200
Subject: [PATCH 6/6] fix: add test without key compression

---
 tests/deployment/test_client_server.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
index c14e64aea..dfd4bc821 100644
--- a/tests/deployment/test_client_server.py
+++ b/tests/deployment/test_client_server.py
@@ -119,10 +119,25 @@ def test_client_server_sklearn_inference(
     max_bit_width = fhe_circuit.graph.maximum_integer_bit_width()
     print(f"Max width {max_bit_width}")
 
-    # Compare the FHE predictions with the clear ones. Simulated predictions are not considered in
-    # this test.
+    # Check that key compression is enabled
+    assert os.environ.get("USE_KEY_COMPRESSION") == "1", "'USE_KEY_COMPRESSION' is not enabled"
+
+    # Check with key compression
     check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1)
 
+    # Check without key compression
+    with pytest.MonkeyPatch.context() as mp_context:
+
+        # Disable input ciphertext compression
+        mp_context.setenv("USE_KEY_COMPRESSION", "0")
+
+        # Check that input ciphertext compression is disabled
+        assert os.environ.get("USE_KEY_COMPRESSION") == "0", "'USE_KEY_COMPRESSION' is not disabled"
+
+        # Compare the FHE predictions with the clear ones. Simulated predictions are not
+        # considered in this test.
+        check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1)
+
     # Check client/server FHE predictions vs the FHE predictions of the dev model
     check_client_server_inference(
         x_test, model, key_dir, check_array_equal, check_float_array_equal