From db8375fff50bdc004f3a504ba527af1a689662d5 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 26 Apr 2024 17:13:17 -0500 Subject: [PATCH 1/2] ci: add spellchecker Signed-off-by: Nathan Weinberg --- .github/workflows/spellcheck.yml | 22 +++++++ .gitignore | 3 + .spellcheck-en-custom.txt | 105 +++++++++++++++++++++++++++++++ Makefile | 4 ++ 4 files changed, 134 insertions(+) create mode 100644 .github/workflows/spellcheck.yml create mode 100644 .spellcheck-en-custom.txt diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 0000000..3567038 --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: Spellcheck + +on: + pull_request: + branches: [main] + paths: + - '**.md' + +permissions: + contents: read + +jobs: + spellcheck: + name: Spellcheck (en_US) + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Spellcheck + uses: rojopolis/spellcheck-github-actions@0.35.0 diff --git a/.gitignore b/.gitignore index e43b0f9..d1d3c59 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ .DS_Store + +# Spelling +dictionary.dic diff --git a/.spellcheck-en-custom.txt b/.spellcheck-en-custom.txt new file mode 100644 index 0000000..c561250 --- /dev/null +++ b/.spellcheck-en-custom.txt @@ -0,0 +1,105 @@ +# make spellcheck-sort +# Please keep this file sorted: +Abhishek +Akash +AMDGPU +arge +arXiv +backend +backends +Bhandwaldar +CLI +cli +Colab +compositional +Conda +Containerfile +cpp +cuBLAS +CUDA +dataset +dev +ditaa +dr +Dropdown +env +Eval +Finetuning +GFX +GGUF +GiB +Gmail +gpu +hipBLAS +ilab +impactful +Inferencing +instructlab +ISA +JIT +Jupyter +KAGGLE +Kaggle +Kaggle's +Kai +Kubernetes +lignment +LLM +llms +LLVM +lora +Merlinite +Miniforge +Mixtral +MLX +mlx +NVidia +Nvidia +orchestrator +ots +Pareja +PEFT +PlantUML +Podman +pre +preprint +pyenv +PyPI +PyTorch +qlora +quantized +Quantizing +Radeon +RDNA +README +repo +ROCm +RTX +RX +Salawu +SDG +sexualized +SHA +Shivchander +Srivastava +subdirectory +Sudalairaj +Taj +tatsu +TBD +Tesla +th +th +tl +tox +unquantized +USM +venv +wikisql +WSL +xcode +XNACK +XT +XTX +Xu +YAML diff --git a/Makefile b/Makefile index 82a3aa8..c6a369e 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,10 @@ help: @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) +.PHONY: spellcheck-sort +spellcheck-sort: .spellcheck-en-custom.txt + sort -d -f -o $< $< + # # If you want to see the full commands, run: # NOISY_BUILD=y make From aa3e7c21302596cad92fd779dd09889c9c483f8f Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Sun, 28 Apr 2024 23:34:41 -0500 Subject: [PATCH 2/2] Add ability to run spellcheck locally Signed-off-by: Nathan Weinberg --- .spellcheck-en-custom.txt | 17 ++++++++++++++++ .spellcheck.yml | 29 ++++++++++++++++++++++++++++ Makefile | 4 ++++ docs/huggingface-publish-strategy.md | 4 ++-- 4 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 .spellcheck.yml diff --git a/.spellcheck-en-custom.txt b/.spellcheck-en-custom.txt index c561250..80a49be 100644 --- a/.spellcheck-en-custom.txt +++ b/.spellcheck-en-custom.txt @@ -7,6 +7,7 @@ arge arXiv backend backends +benchmarking Bhandwaldar CLI cli @@ -18,15 +19,18 @@ cpp cuBLAS CUDA dataset +DCO dev ditaa dr Dropdown env Eval +Excalidraw Finetuning GFX GGUF +GGUFs GiB Gmail gpu @@ -48,7 +52,10 @@ LLM llms LLVM lora +md +Mergify Merlinite +mimimum Miniforge Mixtral MLX @@ -60,9 +67,11 @@ ots Pareja PEFT PlantUML +PNG Podman pre preprint +PR's pyenv PyPI PyTorch @@ -72,15 +81,18 @@ Quantizing Radeon RDNA README +rebase repo ROCm RTX RX +safetensors Salawu SDG sexualized SHA Shivchander +Signoff Srivastava subdirectory Sudalairaj @@ -92,9 +104,14 @@ th th tl tox +triager +Triagers +triager's +triagers unquantized USM venv +watsonx wikisql WSL xcode diff --git a/.spellcheck.yml b/.spellcheck.yml new file mode 100644 index 0000000..7d04b2f --- /dev/null +++ b/.spellcheck.yml @@ -0,0 +1,29 @@ + +# SPDX-License-Identifier: Apache-2.0 + +matrix: +- name: markdown + aspell: + lang: en + d: en_US + camel-case: true + mode: markdown + sources: + - "**/*.md|!REVIEWERS.md|!build/**|!.tox/**" + dictionary: + wordlists: + - .spellcheck-en-custom.txt + pipeline: + - pyspelling.filters.context: + context_visible_first: true + escapes: '\\[\\`~]' + delimiters: + # Ignore multiline content between fences (fences can have 3 or more back ticks) + # ```language + # content + # ``` + - open: '(?s)^(?P *`{3,}).*?$' + close: '^(?P=open)$' + # Ignore text between inline back ticks + - open: '(?P`+)' + close: '(?P=open)' diff --git a/Makefile b/Makefile index c6a369e..3d7ee87 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,10 @@ help: @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) +.PHONY: spellcheck +spellcheck: .spellcheck.yml + pyspelling --config $< + .PHONY: spellcheck-sort spellcheck-sort: .spellcheck-en-custom.txt sort -d -f -o $< $< diff --git a/docs/huggingface-publish-strategy.md b/docs/huggingface-publish-strategy.md index 99ad1f4..60762cd 100644 --- a/docs/huggingface-publish-strategy.md +++ b/docs/huggingface-publish-strategy.md @@ -16,13 +16,13 @@ We will be publishing two different kinds of model families - Merlinite and Gran The Merlinite model family is based off the [Mistral](https://mistral.ai/) model family and uses the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment. You can read more about it [here](https://huggingface.co/instructlab/merlinite-7b-lab). -The InstructLab organzation will be publishing a community version of the Merlinite 7B size model, in both unquantized and 4-bit quantized GGUF form. +The InstructLab organization will be publishing a community version of the Merlinite 7B size model, in both unquantized and 4-bit quantized GGUF form. ## Granite The Granite model family is the [foundational model family](https://www.ibm.com/downloads/cas/X9W4O6BM) for the IBM watsonx AI platform, designed for usage in a business environment. You can read more about it [here](https://huggingface.co/instructlab/granite-7b-lab). -The InstructLab organzation will be publishing a community version of the Granite 7B size model using the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment, in both unquantized and 4-bit quantized GGUF form. +The InstructLab organization will be publishing a community version of the Granite 7B size model using the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment, in both unquantized and 4-bit quantized GGUF form. ## Naming Scheme