Code for the NeurIPS 2023 paper: "ZipLM: Inference-Aware Structured Pruning of Language Models".
@article{kurtic2023sparse,
title={Sparse Finetuning for Inference Acceleration of Large Language Models},
author={Kurtic, Eldar and Kuznedelev, Denis and Frantar, Elias and Goin, Michael and Alistarh, Dan},
journal={arXiv preprint arXiv:2310.06927},
year={2023}
}