diff --git a/.github/checksum_checker.sh b/.github/checksum_checker.sh new file mode 100644 index 000000000000..da02ef3bd1f0 --- /dev/null +++ b/.github/checksum_checker.sh @@ -0,0 +1,97 @@ +#!/bin/bash +set -euxo pipefail +# This scripts needs yq and huggingface_hub to be installed +# to install hugingface_hub run pip install huggingface_hub + +# Path to the input YAML file +input_yaml=$1 + +# Function to download file and check checksum using Python +function check_and_update_checksum() { + model_name="$1" + file_name="$2" + uri="$3" + old_checksum="$4" + idx="$5" + + # Download the file and calculate new checksum using Python + new_checksum=$(python3 -c " +import hashlib +from huggingface_hub import hf_hub_download +import requests +import sys +import os + +uri = '$uri' +file_name = '$file_name' + +# Function to parse the URI and determine download method +# Function to parse the URI and determine download method +def parse_uri(uri): + if uri.startswith('huggingface://'): + # Remove the protocol and extract repo id and filename + repo_id = uri.split('://')[1] + return 'huggingface', repo_id.rsplit('/', 1)[0] + elif 'huggingface.co' in uri: + # For full URLs to Hugging Face, extract repo and filename before '/resolve/' + parts = uri.split('/resolve/') + if len(parts) > 1: + repo_path = parts[0].split('https://huggingface.co/')[-1] + repo_id, file_part = repo_path.rsplit('/', 1) + return 'huggingface', (repo_id, file_part) + return 'direct', uri + + +def calculate_sha256(file_path): + sha256_hash = hashlib.sha256() + with open(file_path, 'rb') as f: + for byte_block in iter(lambda: f.read(4096), b''): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + +download_type, repo_id_or_url = parse_uri(uri) + +# Decide download method based on URI type +if download_type == 'huggingface': + file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name, use_auth_token=False) +else: + # Direct download for non-Hugging Face URLs + response = requests.get(repo_id_or_url) + if response.status_code == 200: + with open(file_name, 'wb') as f: + f.write(response.content) + file_path = file_name + else: + print(f'Error downloading file: {response.status_code}', file=sys.stderr) + sys.exit(1) + +print(calculate_sha256(file_path)) +# Clean up the downloaded file +os.remove(file_path) +") + + # Compare and update the YAML file if checksums do not match + if [[ "$old_checksum" != "$new_checksum" ]]; then + echo "Checksum mismatch for $file_name. Updating..." + yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml" + yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml" + else + echo "Checksum match for $file_name. No update needed." + fi +} + +# Read the YAML and process each file +len=$(yq eval '. | length' "$input_yaml") +for ((i=0; i<$len; i++)) +do + name=$(yq eval ".[$i].name" "$input_yaml") + files_len=$(yq eval ".[$i].files | length" "$input_yaml") + for ((j=0; j<$files_len; j++)) + do + filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml") + uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml") + checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml") + echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum" + check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i" + done +done diff --git a/.github/workflows/checksum_checker.yaml b/.github/workflows/checksum_checker.yaml new file mode 100644 index 000000000000..e177def5cdf7 --- /dev/null +++ b/.github/workflows/checksum_checker.yaml @@ -0,0 +1,29 @@ +name: Check if checksums are up-to-date +on: + schedule: + - cron: 0 20 * * * + workflow_dispatch: +jobs: + checksum_check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install dependencies + run: | + pip install huggingface_hub + # yq + VERSION=v4.43.1 BINARY=yq_linux_amd64 wget https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY}.tar.gz -O - |\ + tar xz && sudo mv ${BINARY} /usr/bin/yq + - name: Checksum checker 🔧 + run: | + bash .github/checksum_checker.sh gallery/index.yaml + - name: Create Pull Request + uses: peter-evans/create-pull-request@v6 + with: + token: ${{ secrets.UPDATE_BOT_TOKEN }} + push-to-fork: ci-forks/LocalAI + commit-message: ':arrow_up: Checksum updates in gallery/index.yaml' + title: 'models(gallery): :arrow_up: update checksum' + branch: "update/checksum" + body: Updating checksums in gallery/index.yaml + signoff: true \ No newline at end of file