From 67966b47e601006561b25c95bb0359c134031b54 Mon Sep 17 00:00:00 2001 From: Dhruv Tiwari Date: Sat, 13 Jul 2024 16:42:36 +0000 Subject: [PATCH] feature: quantization added --- interleaved_generation.py | 7 ++++++- text2image.py | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/interleaved_generation.py b/interleaved_generation.py index dc17ccb..16dea40 100644 --- a/interleaved_generation.py +++ b/interleaved_generation.py @@ -62,12 +62,17 @@ def split_token_sequence( def main(args: argparse.Namespace): """Main function to generate and process model output.""" # Load Chameleon model - model = ChameleonInferenceModel( + unquantized_model = ChameleonInferenceModel( MODEL_7B_PATH.as_posix(), TOKENIZER_TEXT_PATH.as_posix(), TOKENIZER_IMAGE_CFG_PATH.as_posix(), TOKENIZER_IMAGE_PATH.as_posix(), ) + model = torch.quantization.quantize_dynamic( + unquantized_model, # The model to be quantized + {torch.nn.Linear, torch.nn.LSTM}, # Layers to be dynamically quantized + dtype=torch.qint8 # Data type for quantization + ) # Print model configuration print(f"Model path: {MODEL_7B_PATH}") print(f"Text tokenizer path: {TOKENIZER_TEXT_PATH}") diff --git a/text2image.py b/text2image.py index 53f3bcb..918ccf8 100644 --- a/text2image.py +++ b/text2image.py @@ -20,13 +20,17 @@ def main(args: argparse.Namespace): print(f"Batch size: {args.batch_size}") # Load Chameleon model - model = ChameleonInferenceModel( + unquantized_model = ChameleonInferenceModel( MODEL_7B_PATH.as_posix(), TOKENIZER_TEXT_PATH.as_posix(), TOKENIZER_IMAGE_CFG_PATH.as_posix(), TOKENIZER_IMAGE_PATH.as_posix(), ) - + model = torch.quantization.quantize_dynamic( + unquantized_model, # The model to be quantized + {torch.nn.Linear, torch.nn.LSTM}, # Layers to be dynamically quantized + dtype=torch.qint8 # Data type for quantization + ) # Generate options options = Options() options.txt = False