diff --git a/docker/release_resources/sanity_check.py b/docker/release_resources/sanity_check.py index abaa8902a..535437584 100644 --- a/docker/release_resources/sanity_check.py +++ b/docker/release_resources/sanity_check.py @@ -75,6 +75,7 @@ def ml_check(args, keyring_dir_as_str): enable_unsafe_features=True, use_insecure_key_cache=is_fast, insecure_key_cache_location=keyring_dir_as_str, + compress_input_ciphertexts=True, ) # We first compile the model with some data, here the training set @@ -120,6 +121,8 @@ def function_to_compile(x): enable_unsafe_features=is_fast, use_insecure_key_cache=is_fast, insecure_key_cache_location=keyring_dir_as_str, + compress_input_ciphertexts=True, + compress_evaluation_keys=True, ) print("Compiling...") diff --git a/src/concrete/ml/common/utils.py b/src/concrete/ml/common/utils.py index 903ce214f..484676602 100644 --- a/src/concrete/ml/common/utils.py +++ b/src/concrete/ml/common/utils.py @@ -53,6 +53,11 @@ # However, for internal testing purposes, we retain the capability to disable this feature os.environ["USE_INPUT_COMPRESSION"] = os.environ.get("USE_INPUT_COMPRESSION", "1") +# Enable PBS evaluation key compression (~4x size reduction) +# Note: This setting is fixed and cannot be altered by users +# However, for internal testing purposes, we retain the capability to disable this feature +os.environ["USE_KEY_COMPRESSION"] = os.environ.get("USE_KEY_COMPRESSION", "1") + class FheMode(str, enum.Enum): """Enum representing the execution mode. diff --git a/src/concrete/ml/quantization/quantized_module.py b/src/concrete/ml/quantization/quantized_module.py index de9a071fa..41716c51f 100644 --- a/src/concrete/ml/quantization/quantized_module.py +++ b/src/concrete/ml/quantization/quantized_module.py @@ -876,6 +876,7 @@ def compile( # Enable input ciphertext compression enable_input_compression = os.environ.get("USE_INPUT_COMPRESSION", "1") == "1" + enable_key_compression = os.environ.get("USE_KEY_COMPRESSION", "1") == "1" self.fhe_circuit = compiler.compile( inputset, @@ -889,6 +890,7 @@ def compile( fhe_simulation=False, fhe_execution=True, compress_input_ciphertexts=enable_input_compression, + compress_evaluation_keys=enable_key_compression, ) self._is_compiled = True diff --git a/src/concrete/ml/sklearn/base.py b/src/concrete/ml/sklearn/base.py index 2acd87eb9..92916011c 100644 --- a/src/concrete/ml/sklearn/base.py +++ b/src/concrete/ml/sklearn/base.py @@ -569,6 +569,8 @@ def compile( # Enable input ciphertext compression enable_input_compression = os.environ.get("USE_INPUT_COMPRESSION", "1") == "1" + # Enable evaluation key compression + enable_key_compression = os.environ.get("USE_KEY_COMPRESSION", "1") == "1" self.fhe_circuit_ = module_to_compile.compile( inputset, @@ -582,6 +584,7 @@ def compile( fhe_simulation=False, fhe_execution=True, compress_input_ciphertexts=enable_input_compression, + compress_evaluation_keys=enable_key_compression, ) self._is_compiled = True diff --git a/src/concrete/ml/sklearn/linear_model.py b/src/concrete/ml/sklearn/linear_model.py index 341a57fd7..6e6cd861f 100644 --- a/src/concrete/ml/sklearn/linear_model.py +++ b/src/concrete/ml/sklearn/linear_model.py @@ -347,7 +347,7 @@ def _get_training_quantized_module( # Enable the underlying FHE circuit to be composed with itself # This feature is used in order to be able to iterate in the clear n times without having # to encrypt/decrypt the weight/bias values between each loop - configuration = Configuration(composable=True) + configuration = Configuration(composable=True, compress_evaluation_keys=True) composition_mapping = {0: 2, 1: 3} diff --git a/tests/deployment/test_client_server.py b/tests/deployment/test_client_server.py index c14e64aea..dfd4bc821 100644 --- a/tests/deployment/test_client_server.py +++ b/tests/deployment/test_client_server.py @@ -119,10 +119,25 @@ def test_client_server_sklearn_inference( max_bit_width = fhe_circuit.graph.maximum_integer_bit_width() print(f"Max width {max_bit_width}") - # Compare the FHE predictions with the clear ones. Simulated predictions are not considered in - # this test. + # Check that key compression is enabled + assert os.environ.get("USE_KEY_COMPRESSION") == "1", "'USE_KEY_COMPRESSION' is not enabled" + + # Check with key compression check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1) + # Check without key compression + with pytest.MonkeyPatch.context() as mp_context: + + # Disable input ciphertext compression + mp_context.setenv("USE_KEY_COMPRESSION", "0") + + # Check that input ciphertext compression is disabled + assert os.environ.get("USE_KEY_COMPRESSION") == "0", "'USE_KEY_COMPRESSION' is not disabled" + + # Compare the FHE predictions with the clear ones. Simulated predictions are not + # considered in this test. + check_is_good_execution_for_cml_vs_circuit(x_test, model, simulate=False, n_allowed_runs=1) + # Check client/server FHE predictions vs the FHE predictions of the dev model check_client_server_inference( x_test, model, key_dir, check_array_equal, check_float_array_equal diff --git a/tests/torch/test_hybrid_converter.py b/tests/torch/test_hybrid_converter.py index b2ce6e07c..7becb76cc 100644 --- a/tests/torch/test_hybrid_converter.py +++ b/tests/torch/test_hybrid_converter.py @@ -40,6 +40,7 @@ def run_hybrid_llm_test( # Multi-parameter strategy is used in order to speed-up the FHE executions configuration = Configuration( single_precision=False, + compress_input_ciphertexts=True, ) # Create a hybrid model