Skip to content

Commit

Permalink
Docs (gpfxq): updating docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
i-colbert authored and Giuseppe5 committed Feb 15, 2024
1 parent 35069dc commit 0570e88
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/brevitas/graph/gpfq.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ class gpfq_mode(gpxq_mode):
Args:
model (Module): The model to quantize with GPFQ
group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group
of layer names that can be optimized in parallel. Default: None
inplace (bool): Wheter to apply GPFQ inplace or perform a deepcopy. Default: True
create_weight_orig (bool): If True, store the original floating point weights before applying
gpfq. These weights will be used anytime quantization is disabled. Default: True
use_quant_activations (bool): Wheter to leave quantize activations enabled while performing
GPFQ. Default: False
p (float): The percentage of processed inputs to use. Default: 1.0
Expand Down
8 changes: 8 additions & 0 deletions src/brevitas/graph/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,17 @@ class gptq_mode(gpxq_mode):
Args:
model (Module): The model to quantize with GPTQ
group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group
of layer names that can be optimized in parallel. Default: None
inplace (bool): Wheter to apply GPTQ inplace or perform a deepcopy. Default: True
create_weight_orig (bool): If True, store the original floating point weights before applying
gptq. These weights will be used anytime quantization is disabled. Default: True
use_quant_activations (bool): Wheter to leave quantize activations enabled while performing
GPTQ. Default: False
num_blocks (int): The number of sub-blocks to use to speed-up GPTQ computation. Default: 100
act_order (bool): Whether to order greedy path following by Hessian approximation. Default: False
return_forward_output (bool): If True, returns the output of the forward pass. Otherwise the
forward call inside the context manager returns None. Default: False
Example:
>>> with torch.no_grad():
Expand Down
26 changes: 26 additions & 0 deletions src/brevitas/graph/gpxq.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,32 @@ class LayerHandler:


class gpxq_mode(ABC):
"""
Apply GPxQ algorithm.
Args:
model (Module): The model to quantize with GPxQ
group_of_parallel_layers (Optional, List[str]): .List of lists where each inner list is a group
of layer names that can be optimized in parallel. Default: None
inplace (bool): Wheter to apply GPFQ inplace or perform a deepcopy. Default: True
create_weight_orig (bool): If True, store the original floating point weights before applying
gpxq. These weights will be used anytime quantization is disabled. Default: True
use_quant_activations (bool): Wheter to leave quantize activations enabled while performing
GPxQ. Default: False
act_order (bool): Whether to order greedy path following by Hessian approximation. Default: False
return_forward_output (bool): If True, returns the output of the forward pass. Otherwise the
forward call inside the context manager returns None. Default: False
Example:
>>> with torch.no_grad():
>>> with gpxq_mode(model) as gpxq:
>>> gpxq_mode = gpxq.model
>>> for i in tqdm(range(gpxq.num_layers)):
>>> for img, t in calib_loader:
>>> img = img.cuda()
>>> gpxq_mode(img)
>>> gpxq.update()
"""

def __init__(
self,
Expand Down

0 comments on commit 0570e88

Please sign in to comment.