From 653d92a880488007f45f6c2b0e07d2dd199a5883 Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Fri, 26 Apr 2024 13:26:53 -0500 Subject: [PATCH] ci: add markdown linting and fix lint errors Signed-off-by: Nathan Weinberg --- .github/workflows/docs.yml | 23 +++++++++++++++++++++++ .markdownlint-cli2.yaml | 13 +++++++++++++ Makefile | 22 ++++++++++++++++++++++ SECURITY.md | 6 +++--- docs/github-merge-strategy.md | 2 +- docs/github-taxonomy-automation.md | 10 +++++----- docs/huggingface-publish-strategy.md | 6 ++++++ 7 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/docs.yml create mode 100644 .markdownlint-cli2.yaml create mode 100644 Makefile diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..ecb12dd --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,23 @@ +name: docs + +on: + push: + branches: ["main"] + paths: + - '**/*.md' + - '.markdownlint-cli2.yaml' + pull_request: + branches: ["main"] + paths: + - '**/*.md' + - '.markdownlint-cli2.yaml' + +jobs: + markdown-lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: markdownlint-cli2-action + uses: DavidAnson/markdownlint-cli2-action@v15 + with: + globs: '**/*.md' diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml new file mode 100644 index 0000000..1104379 --- /dev/null +++ b/.markdownlint-cli2.yaml @@ -0,0 +1,13 @@ +config: + line-length: false + no-emphasis-as-header: false + first-line-heading: false + code-block-style: false + no-duplicate-header: false + single-trailing-newline: false +globs: + - "**/*.md" +ignores: + - ".tox/**" + - "venv/**" + - ".venv/**" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..82a3aa8 --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +.PHONY: help +help: + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +# +# If you want to see the full commands, run: +# NOISY_BUILD=y make +# +ifeq ($(NOISY_BUILD),) + ECHO_PREFIX=@ + CMD_PREFIX=@ + PIPE_DEV_NULL=> /dev/null 2> /dev/null +else + ECHO_PREFIX=@\# + CMD_PREFIX= + PIPE_DEV_NULL= +endif + +.PHONY: md-lint +md-lint: ## Lint markdown files + $(ECHO_PREFIX) printf " %-12s ./...\n" "[MD LINT]" + $(CMD_PREFIX) podman run --rm -v $(CURDIR):/workdir --security-opt label=disable docker.io/davidanson/markdownlint-cli2:v0.12.1 > /dev/null diff --git a/SECURITY.md b/SECURITY.md index 51f3987..7a78a43 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,15 +2,15 @@ The InstructLab team and community take security bugs seriously. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions. -# Reporting a Vulnerability +## Reporting a Vulnerability -If you think you've identified a security issue in an InstructLab project repository, please DO NOT report the issue publicly via the GitHub issue tracker, Slack Workspace, etc. +If you think you've identified a security issue in an InstructLab project repository, please DO NOT report the issue publicly via the GitHub issue tracker, Slack Workspace, etc. Instead, send an email with as many details as possible to [instructlab-sec@osci.io](mailto:instructlab-sec@osci.io). This is a private mailing list for the core maintainers. Please do not create a public issue. -# Security Vulnerability Response +## Security Vulnerability Response Each report is acknowledged and analyzed by the core maintainers within 3 working days. diff --git a/docs/github-merge-strategy.md b/docs/github-merge-strategy.md index 09b0883..1ebc690 100644 --- a/docs/github-merge-strategy.md +++ b/docs/github-merge-strategy.md @@ -9,6 +9,7 @@ Every Pull Request that is made to an InstructLab repository should meet the bel ### CI checks We should require that all CI checks pass on a Pull Request before it can be considered for merge. Every repository should have at mimimum the following checks: + - Linting - Testing (Unit, Functional, etc) - DCO Commit Signoff via a `Signed-off-by` header. There is a DCO check enabled for all repositories in this GitHub organization. @@ -30,4 +31,3 @@ We use the default merge method of creating merge commits for PRs. This is to en This requires project maintainers to include commit messages and the overall structure of the commit series as part of their review. When multiple commits are present, they should represent a logical series of changes that implement the overall change proposed in the PR. The commit message for each should clearly explain that step of the progression. It is common that a PR author may need to do a final rebase to clean up their proposed commit series before a PR can be merged. It is also fine for a project maintainer to perform this step when the changes necessary are straight forward enough to do so. This includes doing a final rebase on `main` if necessary. The PR itself should NOT include any merge commits of `main` back into the developer's branch. We expect the proposed commit series to be a clean set of commits against `main` without conflicts or merge commit history. We only use a merge commit to record the PR's inclusion into `main`. - diff --git a/docs/github-taxonomy-automation.md b/docs/github-taxonomy-automation.md index 04e0623..de9ade5 100644 --- a/docs/github-taxonomy-automation.md +++ b/docs/github-taxonomy-automation.md @@ -41,16 +41,16 @@ The bot should allow configuring a list of GitHub teams that are allowed to enable its functionality on a PR. The first command will only be accessible to the following teams: -- Taxonomy Triagers -- Taxonomy Maintainers -- Backend Maintainers -- Instruct Lab Org Admins +* Taxonomy Triagers +* Taxonomy Maintainers +* Backend Maintainers +* Instruct Lab Org Admins This first command is used to indicate that a PR has been reviewed enough to determine that it is safe to run automated tasks against it. This is to help avoid abuse of our backend resources with malicious PRs. -``` +```text @instruct-lab-bot enable ``` diff --git a/docs/huggingface-publish-strategy.md b/docs/huggingface-publish-strategy.md index 513bc04..99ad1f4 100644 --- a/docs/huggingface-publish-strategy.md +++ b/docs/huggingface-publish-strategy.md @@ -3,24 +3,29 @@ This document describes the publishing strategy used for all models in the [InstructLab](https://huggingface.co/instructlab) organization. ## What are we publishing and why? + The InstructLab team will be periodically training the full unquantized model with new Pull Requests to the [taxonomy](https://github.com/instructlab/taxonomy) repository. When the evaluation shows that the model has improved, the team will be publishing an unquantized model and a 4-bit quantized GGUF form model to a platform called HuggingFace. ## What is HuggingFace? + [HuggingFace](https://huggingface.co/) is a centralized web service platform, similar to GitHub, for hosting Git-based repositories related to data science and machine learning. In the context of InstructLab, HuggingFace is the platform where we will be publishing releases of our model for consumption by the community. We will be publishing two different kinds of model families - Merlinite and Granite. ## Merlinite + The Merlinite model family is based off the [Mistral](https://mistral.ai/) model family and uses the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment. You can read more about it [here](https://huggingface.co/instructlab/merlinite-7b-lab). The InstructLab organzation will be publishing a community version of the Merlinite 7B size model, in both unquantized and 4-bit quantized GGUF form. ## Granite + The Granite model family is the [foundational model family](https://www.ibm.com/downloads/cas/X9W4O6BM) for the IBM watsonx AI platform, designed for usage in a business environment. You can read more about it [here](https://huggingface.co/instructlab/granite-7b-lab). The InstructLab organzation will be publishing a community version of the Granite 7B size model using the [Large-scale Alignment for chatBots (LAB)](https://arxiv.org/abs/2403.01081) alignment, in both unquantized and 4-bit quantized GGUF form. ## Naming Scheme + The naming scheme for both Merlinite and Granite will follow this generic scheme: ` - - - - ` @@ -35,4 +40,5 @@ The specific schemes that will be published are detailed below: | [`granite-7b-lab-GGUF`](https://huggingface.co/instructlab/granite-7b-lab-GGUF) | `release-yyyymmdd` | Where the full precision and quantized Granite GGUFs live | ## Retention Policy + The InstructLab team will maintain the most recent **10** published versions of the respective models.