fix: always use evaluation key compression (#726)

zama-ai · Jun 13, 2024 · b4e1060 · b4e1060
1 parent 06075bd
commit b4e1060
Show file tree

Hide file tree

Showing 7 changed files with 32 additions and 3 deletions.
diff --git a/docker/release_resources/sanity_check.py b/docker/release_resources/sanity_check.py
@@ -75,6 +75,7 @@ def ml_check(args, keyring_dir_as_str):
         enable_unsafe_features=True,
         use_insecure_key_cache=is_fast,
         insecure_key_cache_location=keyring_dir_as_str,
+        compress_input_ciphertexts=True,
     )
 
     # We first compile the model with some data, here the training set
@@ -120,6 +121,8 @@ def function_to_compile(x):
         enable_unsafe_features=is_fast,
         use_insecure_key_cache=is_fast,
         insecure_key_cache_location=keyring_dir_as_str,
+        compress_input_ciphertexts=True,
+        compress_evaluation_keys=True,
     )
 
     print("Compiling...")

diff --git a/src/concrete/ml/common/utils.py b/src/concrete/ml/common/utils.py
@@ -53,6 +53,11 @@
 # However, for internal testing purposes, we retain the capability to disable this feature
 os.environ["USE_INPUT_COMPRESSION"] = os.environ.get("USE_INPUT_COMPRESSION", "1")
 
+# Enable PBS evaluation key compression (~4x size reduction)
+# Note: This setting is fixed and cannot be altered by users
+# However, for internal testing purposes, we retain the capability to disable this feature
+os.environ["USE_KEY_COMPRESSION"] = os.environ.get("USE_KEY_COMPRESSION", "1")
+
 
 class FheMode(str, enum.Enum):
     """Enum representing the execution mode.

diff --git a/src/concrete/ml/quantization/quantized_module.py b/src/concrete/ml/quantization/quantized_module.py
@@ -876,6 +876,7 @@ def compile(
 
         # Enable input ciphertext compression
         enable_input_compression = os.environ.get("USE_INPUT_COMPRESSION", "1") == "1"
+        enable_key_compression = os.environ.get("USE_KEY_COMPRESSION", "1") == "1"
 
         self.fhe_circuit = compiler.compile(
             inputset,
@@ -889,6 +890,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
+            compress_evaluation_keys=enable_key_compression,
         )
 
         self._is_compiled = True

diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py
@@ -569,6 +569,8 @@ def compile(
 
         # Enable input ciphertext compression
         enable_input_compression = os.environ.get("USE_INPUT_COMPRESSION", "1") == "1"
+        # Enable evaluation key compression
+        enable_key_compression = os.environ.get("USE_KEY_COMPRESSION", "1") == "1"
 
         self.fhe_circuit_ = module_to_compile.compile(
             inputset,
@@ -582,6 +584,7 @@ def compile(
             fhe_simulation=False,
             fhe_execution=True,
             compress_input_ciphertexts=enable_input_compression,
+            compress_evaluation_keys=enable_key_compression,
         )
 
         self._is_compiled = True

diff --git a/src/concrete/ml/sklearn/linear_model.py b/src/concrete/ml/sklearn/linear_model.py
@@ -347,7 +347,7 @@ def _get_training_quantized_module(
         # Enable the underlying FHE circuit to be composed with itself
         # This feature is used in order to be able to iterate in the clear n times without having
         # to encrypt/decrypt the weight/bias values between each loop
-        configuration = Configuration(composable=True)
+        configuration = Configuration(composable=True, compress_evaluation_keys=True)
 
         composition_mapping = {0: 2, 1: 3}
 

diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py
@@ -119,10 +119,25 @@ def test_client_server_sklearn_inference(
     max_bit_width = fhe_circuit.graph.maximum_integer_bit_width()
     print(f"Max width {max_bit_width}")
 
-    # Compare the FHE predictions with the clear ones. Simulated predictions are not considered in
-    # this test.
+    # Check that key compression is enabled
+    assert os.environ.get("USE_KEY_COMPRESSION") == "1", "'USE_KEY_COMPRESSION' is not enabled"
+
+    # Check with key compression
     check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1)
 
+    # Check without key compression
+    with pytest.MonkeyPatch.context() as mp_context:
+
+        # Disable input ciphertext compression
+        mp_context.setenv("USE_KEY_COMPRESSION", "0")
+
+        # Check that input ciphertext compression is disabled
+        assert os.environ.get("USE_KEY_COMPRESSION") == "0", "'USE_KEY_COMPRESSION' is not disabled"
+
+        # Compare the FHE predictions with the clear ones. Simulated predictions are not
+        # considered in this test.
+        check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1)
+
     # Check client/server FHE predictions vs the FHE predictions of the dev model
     check_client_server_inference(
         x_test, model, key_dir, check_array_equal, check_float_array_equal

diff --git a/tests/torch/test_hybrid_converter.py b/tests/torch/test_hybrid_converter.py
@@ -40,6 +40,7 @@ def run_hybrid_llm_test(
     # Multi-parameter strategy is used in order to speed-up the FHE executions
     configuration = Configuration(
         single_precision=False,
+        compress_input_ciphertexts=True,
     )
 
     # Create a hybrid model