From db8375fff50bdc004f3a504ba527af1a689662d5 Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <nweinber@redhat.com>
Date: Fri, 26 Apr 2024 17:13:17 -0500
Subject: [PATCH 1/2] ci: add spellchecker

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
---
 .github/workflows/spellcheck.yml |  22 +++++++
 .gitignore                       |   3 +
 .spellcheck-en-custom.txt        | 105 +++++++++++++++++++++++++++++++
 Makefile                         |   4 ++
 4 files changed, 134 insertions(+)
 create mode 100644 .github/workflows/spellcheck.yml
 create mode 100644 .spellcheck-en-custom.txt
diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml
new file mode 100644
index 0000000..3567038
--- /dev/null
+++ b/.github/workflows/spellcheck.yml
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: Apache-2.0
+
+name: Spellcheck
+
+on:
+  pull_request:
+    branches: [main]
+    paths:
+      - '**.md'
+
+permissions:
+  contents: read
+
+jobs:
+  spellcheck:
+    name: Spellcheck (en_US)
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+      - name: Spellcheck
+        uses: rojopolis/spellcheck-github-actions@0.35.0
diff --git a/.gitignore b/.gitignore
index e43b0f9..d1d3c59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
 .DS_Store
+
+# Spelling
+dictionary.dic
diff --git a/.spellcheck-en-custom.txt b/.spellcheck-en-custom.txt
new file mode 100644
index 0000000..c561250
--- /dev/null
+++ b/.spellcheck-en-custom.txt
@@ -0,0 +1,105 @@
+# make spellcheck-sort
+# Please keep this file sorted:
+Abhishek
+Akash
+AMDGPU
+arge
+arXiv
+backend
+backends
+Bhandwaldar
+CLI
+cli
+Colab
+compositional
+Conda
+Containerfile
+cpp
+cuBLAS
+CUDA
+dataset
+dev
+ditaa
+dr
+Dropdown
+env
+Eval
+Finetuning
+GFX
+GGUF
+GiB
+Gmail
+gpu
+hipBLAS
+ilab
+impactful
+Inferencing
+instructlab
+ISA
+JIT
+Jupyter
+KAGGLE
+Kaggle
+Kaggle's
+Kai
+Kubernetes
+lignment
+LLM
+llms
+LLVM
+lora
+Merlinite
+Miniforge
+Mixtral
+MLX
+mlx
+NVidia
+Nvidia
+orchestrator
+ots
+Pareja
+PEFT
+PlantUML
+Podman
+pre
+preprint
+pyenv
+PyPI
+PyTorch
+qlora
+quantized
+Quantizing
+Radeon
+RDNA
+README
+repo
+ROCm
+RTX
+RX
+Salawu
+SDG
+sexualized
+SHA
+Shivchander
+Srivastava
+subdirectory
+Sudalairaj
+Taj
+tatsu
+TBD
+Tesla
+th
+th
+tl
+tox
+unquantized
+USM
+venv
+wikisql
+WSL
+xcode
+XNACK
+XT
+XTX
+Xu
+YAML
diff --git a/Makefile b/Makefile
index 82a3aa8..c6a369e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,10 @@
 help:
 	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
 
+.PHONY: spellcheck-sort
+spellcheck-sort: .spellcheck-en-custom.txt
+	sort -d -f -o $< $<
+
 #
 # If you want to see the full commands, run:
 #   NOISY_BUILD=y make

From aa3e7c21302596cad92fd779dd09889c9c483f8f Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <nweinber@redhat.com>
Date: Sun, 28 Apr 2024 23:34:41 -0500
Subject: [PATCH 2/2] Add ability to run spellcheck locally

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
---
 .spellcheck-en-custom.txt            | 17 ++++++++++++++++
 .spellcheck.yml                      | 29 ++++++++++++++++++++++++++++
 Makefile                             |  4 ++++
 docs/huggingface-publish-strategy.md |  4 ++--
 4 files changed, 52 insertions(+), 2 deletions(-)
 create mode 100644 .spellcheck.yml

diff --git a/.spellcheck-en-custom.txt b/.spellcheck-en-custom.txt
index c561250..80a49be 100644
--- a/.spellcheck-en-custom.txt
+++ b/.spellcheck-en-custom.txt
@@ -7,6 +7,7 @@ arge
 arXiv
 backend
 backends
+benchmarking
 Bhandwaldar
 CLI
 cli
@@ -18,15 +19,18 @@ cpp
 cuBLAS
 CUDA
 dataset
+DCO
 dev
 ditaa
 dr
 Dropdown
 env
 Eval
+Excalidraw
 Finetuning
 GFX
 GGUF
+GGUFs
 GiB
 Gmail
 gpu
@@ -48,7 +52,10 @@ LLM
 llms
 LLVM
 lora
+md
+Mergify
 Merlinite
+mimimum
 Miniforge
 Mixtral
 MLX
@@ -60,9 +67,11 @@ ots
 Pareja
 PEFT
 PlantUML
+PNG
 Podman
 pre
 preprint
+PR's
 pyenv
 PyPI
 PyTorch
@@ -72,15 +81,18 @@ Quantizing
 Radeon
 RDNA
 README
+rebase
 repo
 ROCm
 RTX
 RX
+safetensors
 Salawu
 SDG
 sexualized
 SHA
 Shivchander
+Signoff
 Srivastava
 subdirectory
 Sudalairaj
@@ -92,9 +104,14 @@ th
 th
 tl
 tox
+triager
+Triagers
+triager's
+triagers
 unquantized
 USM
 venv
+watsonx
 wikisql
 WSL
 xcode
diff --git a/.spellcheck.yml b/.spellcheck.yml
new file mode 100644
index 0000000..7d04b2f
--- /dev/null
+++ b/.spellcheck.yml
@@ -0,0 +1,29 @@
+
+# SPDX-License-Identifier: Apache-2.0
+
+matrix:
+- name: markdown
+  aspell:
+    lang: en
+    d: en_US
+    camel-case: true
+    mode: markdown
+  sources:
+  - "**/*.md|!REVIEWERS.md|!build/**|!.tox/**"
+  dictionary:
+    wordlists:
+    - .spellcheck-en-custom.txt
+  pipeline:
+  - pyspelling.filters.context:
+      context_visible_first: true
+      escapes: '\\[\\`~]'
+      delimiters:
+      # Ignore multiline content between fences (fences can have 3 or more back ticks)
+      # ```language
+      # content
+      # ```
+      - open: '(?s)^(?P<open> *`{3,}).*?$'
+        close: '^(?P=open)$'
+      # Ignore text between inline back ticks
+      - open: '(?P<open>`+)'
+        close: '(?P=open)'
diff --git a/Makefile b/Makefile
index c6a369e..3d7ee87 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,10 @@
 help:
 	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
 
+.PHONY: spellcheck
+spellcheck: .spellcheck.yml
+	pyspelling --config $<
+
 .PHONY: spellcheck-sort
 spellcheck-sort: .spellcheck-en-custom.txt
 	sort -d -f -o $< $<
diff --git a/docs/huggingface-publish-strategy.md b/docs/huggingface-publish-strategy.md
index 99ad1f4..60762cd 100644
--- a/docs/huggingface-publish-strategy.md
+++ b/docs/huggingface-publish-strategy.md
@@ -16,13 +16,13 @@ We will be publishing two different kinds of model families - Merlinite and Gran
 
 The Merlinite model family is based off the [Mistral](https://mistral.ai/) model family and uses the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment. You can read more about it [here](https://huggingface.co/instructlab/merlinite-7b-lab).
 
-The InstructLab organzation will be publishing a community version of the Merlinite 7B size model, in both unquantized and 4-bit quantized GGUF form.
+The InstructLab organization will be publishing a community version of the Merlinite 7B size model, in both unquantized and 4-bit quantized GGUF form.
 
 ## Granite
 
 The Granite model family is the [foundational model family](https://www.ibm.com/downloads/cas/X9W4O6BM) for the IBM watsonx AI platform, designed for usage in a business environment. You can read more about it [here](https://huggingface.co/instructlab/granite-7b-lab).
 
-The InstructLab organzation will be publishing a community version of the Granite 7B size model using the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment, in both unquantized and 4-bit quantized GGUF form.
+The InstructLab organization will be publishing a community version of the Granite 7B size model using the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment, in both unquantized and 4-bit quantized GGUF form.
 
 ## Naming Scheme