Docs (gpfxq): updating docstrings

Xilinx · Feb 15, 2024 · 0570e88 · 0570e88
1 parent 35069dc
commit 0570e88
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 0 deletions.
diff --git a/src/brevitas/graph/gpfq.py b/src/brevitas/graph/gpfq.py
@@ -23,7 +23,11 @@ class gpfq_mode(gpxq_mode):
 
     Args:
         model (Module): The model to quantize with GPFQ
+        group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group
+            of layer names that can be optimized in parallel. Default: None
         inplace (bool): Wheter to apply GPFQ inplace or perform a deepcopy. Default: True
+        create_weight_orig (bool): If True, store the original floating point weights before applying
+            gpfq. These weights will be used anytime quantization is disabled. Default: True
         use_quant_activations (bool): Wheter to leave quantize activations enabled while performing
             GPFQ. Default: False
         p (float): The percentage of processed inputs to use. Default: 1.0

diff --git a/src/brevitas/graph/gptq.py b/src/brevitas/graph/gptq.py
@@ -27,9 +27,17 @@ class gptq_mode(gpxq_mode):
 
     Args:
         model (Module): The model to quantize with GPTQ
+        group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group
+            of layer names that can be optimized in parallel. Default: None
         inplace (bool): Wheter to apply GPTQ inplace or perform a deepcopy. Default: True
+        create_weight_orig (bool): If True, store the original floating point weights before applying
+            gptq. These weights will be used anytime quantization is disabled. Default: True
         use_quant_activations (bool): Wheter to leave quantize activations enabled while performing
             GPTQ. Default: False
+        num_blocks (int): The number of sub-blocks to use to speed-up GPTQ computation. Default: 100
+        act_order (bool): Whether to order greedy path following by Hessian approximation. Default: False
+        return_forward_output (bool): If True, returns the output of the forward pass. Otherwise the
+            forward call inside the context manager returns None. Default: False
 
     Example:
         >>> with torch.no_grad():

diff --git a/src/brevitas/graph/gpxq.py b/src/brevitas/graph/gpxq.py
@@ -34,6 +34,32 @@ class LayerHandler:
 
 
 class gpxq_mode(ABC):
+    """
+    Apply GPxQ algorithm.
+
+    Args:
+        model (Module): The model to quantize with GPxQ
+        group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group
+            of layer names that can be optimized in parallel. Default: None
+        inplace (bool): Wheter to apply GPFQ inplace or perform a deepcopy. Default: True
+        create_weight_orig (bool): If True, store the original floating point weights before applying
+            gpxq. These weights will be used anytime quantization is disabled. Default: True
+        use_quant_activations (bool): Wheter to leave quantize activations enabled while performing
+            GPxQ. Default: False
+        act_order (bool): Whether to order greedy path following by Hessian approximation. Default: False
+        return_forward_output (bool): If True, returns the output of the forward pass. Otherwise the
+            forward call inside the context manager returns None. Default: False
+
+    Example:
+        >>> with torch.no_grad():
+        >>>     with gpxq_mode(model) as gpxq:
+        >>>         gpxq_mode = gpxq.model
+        >>>         for i in tqdm(range(gpxq.num_layers)):
+        >>>             for img, t in calib_loader:
+        >>>                 img = img.cuda()
+        >>>                 gpxq_mode(img)
+        >>>             gpxq.update()
+    """
 
     def __init__(
             self,