diff --git a/src/brevitas/graph/gpfq.py b/src/brevitas/graph/gpfq.py index b08a9701c..e255660a0 100644 --- a/src/brevitas/graph/gpfq.py +++ b/src/brevitas/graph/gpfq.py @@ -23,7 +23,11 @@ class gpfq_mode(gpxq_mode): Args: model (Module): The model to quantize with GPFQ + group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group + of layer names that can be optimized in parallel. Default: None inplace (bool): Wheter to apply GPFQ inplace or perform a deepcopy. Default: True + create_weight_orig (bool): If True, store the original floating point weights before applying + gpfq. These weights will be used anytime quantization is disabled. Default: True use_quant_activations (bool): Wheter to leave quantize activations enabled while performing GPFQ. Default: False p (float): The percentage of processed inputs to use. Default: 1.0 diff --git a/src/brevitas/graph/gptq.py b/src/brevitas/graph/gptq.py index 907633e0f..28cb12cd6 100644 --- a/src/brevitas/graph/gptq.py +++ b/src/brevitas/graph/gptq.py @@ -27,9 +27,17 @@ class gptq_mode(gpxq_mode): Args: model (Module): The model to quantize with GPTQ + group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group + of layer names that can be optimized in parallel. Default: None inplace (bool): Wheter to apply GPTQ inplace or perform a deepcopy. Default: True + create_weight_orig (bool): If True, store the original floating point weights before applying + gptq. These weights will be used anytime quantization is disabled. Default: True use_quant_activations (bool): Wheter to leave quantize activations enabled while performing GPTQ. Default: False + num_blocks (int): The number of sub-blocks to use to speed-up GPTQ computation. Default: 100 + act_order (bool): Whether to order greedy path following by Hessian approximation. Default: False + return_forward_output (bool): If True, returns the output of the forward pass. Otherwise the + forward call inside the context manager returns None. Default: False Example: >>> with torch.no_grad(): diff --git a/src/brevitas/graph/gpxq.py b/src/brevitas/graph/gpxq.py index 6f570337c..149e8ec03 100644 --- a/src/brevitas/graph/gpxq.py +++ b/src/brevitas/graph/gpxq.py @@ -34,6 +34,32 @@ class LayerHandler: class gpxq_mode(ABC): + """ + Apply GPxQ algorithm. + + Args: + model (Module): The model to quantize with GPxQ + group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group + of layer names that can be optimized in parallel. Default: None + inplace (bool): Wheter to apply GPFQ inplace or perform a deepcopy. Default: True + create_weight_orig (bool): If True, store the original floating point weights before applying + gpxq. These weights will be used anytime quantization is disabled. Default: True + use_quant_activations (bool): Wheter to leave quantize activations enabled while performing + GPxQ. Default: False + act_order (bool): Whether to order greedy path following by Hessian approximation. Default: False + return_forward_output (bool): If True, returns the output of the forward pass. Otherwise the + forward call inside the context manager returns None. Default: False + + Example: + >>> with torch.no_grad(): + >>> with gpxq_mode(model) as gpxq: + >>> gpxq_mode = gpxq.model + >>> for i in tqdm(range(gpxq.num_layers)): + >>> for img, t in calib_loader: + >>> img = img.cuda() + >>> gpxq_mode(img) + >>> gpxq.update() + """ def __init__( self,