Skip to content

Commit

Permalink
chore: working on CIFAR upload to the DB
Browse files Browse the repository at this point in the history
  • Loading branch information
fd0r committed Jan 15, 2024
1 parent d8d5734 commit 59aff6b
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 16 deletions.
26 changes: 16 additions & 10 deletions .github/workflows/cifar_benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -131,16 +131,6 @@ jobs:
NUM_SAMPLES=${{ github.event.inputs.num_samples }} python3 ./use_case_examples/cifar/cifar_brevitas_training/evaluate_one_example_fhe.py
python3 ./benchmarks/convert_cifar.py --model-name "16-bits-trained-v0"
- name: Upload results
if: ${{ github.repository == 'zama-ai/concrete-ml-internal' }}
id: upload-results
run: |
curl \
-H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \
-H "Content-Type: application/json" \
-d @to_upload.json \
-X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment"
- name: Archive raw predictions
uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -177,6 +167,22 @@ jobs:
name: server.zip
path: client_server/server.zip

# We need to keep this as the last step to avoid not uploading the artifacts
# if the step crashes
- name: Upload results
id: upload-results
run: |
# Log the json
cat to_upload.json | jq
sleep 1.
# Upload the json to the benchmark database
curl \
-H "Authorization: Bearer ${{ secrets.NEW_ML_PROGRESS_TRACKER_TOKEN }}" \
-H "Content-Type: application/json" \
-d @to_upload.json \
-X POST "${{ secrets.NEW_ML_PROGRESS_TRACKER_URL }}experiment"
stop-runner:
name: Stop EC2 runner
needs: [run-cifar-10, start-cifar-runner]
Expand Down
75 changes: 69 additions & 6 deletions benchmarks/convert_cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,20 @@
import argparse
import datetime
import json
import logging
import platform
import re
import socket
import subprocess
import uuid
from importlib.metadata import version
from pathlib import Path
from typing import Any, Dict, List, Union

import cpuinfo
import numpy as np
import pandas as pd
import psutil
from convert import get_git_hash, get_git_hash_date, git_iso_to_python_iso, is_git_diff


Expand All @@ -28,6 +36,62 @@ def minimum_bribes(q):
return bribes


def get_size(bytes_count: float, suffix="B"):
"""
Scale bytes to its proper format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
"""
factor = 1024
for unit in ["", "K", "M", "G", "T", "P"]:
if bytes_count < factor:
return f"{bytes_count:.2f}{unit}{suffix}"
bytes_count /= factor


def get_system_information():
# From https://stackoverflow.com/questions/3103178/how-to-get-the-system-info-with-python
info = {}
info["platform"] = platform.system()
info["platform-release"] = platform.release()
info["platform-version"] = platform.version()
info["architecture"] = platform.machine()
info["hostname"] = socket.gethostname()
info["processor"] = platform.processor()
info["ram"] = get_size(psutil.virtual_memory().total)
info["physical_cores"] = psutil.cpu_count(logical=False)
info["total_cores"] = psutil.cpu_count(logical=True)
info["processor_brand"] = cpuinfo.get_cpu_info()["brand_raw"]
uname = platform.uname()
info["machine"] = uname.machine
info["processor"] = uname.processor
info["system"] = uname.system
info["node_name"] = uname.node
info["release"] = uname.release
info["version"] = uname.version
info["swap"] = get_size(psutil.swap_memory().total)
return info


def get_ec2_metadata():
res = {}
try:
output = subprocess.check_output("ec2metadata", shell=True, encoding="utf-8")
for line in output.split("\n"):
if line:
splitted = line.split(": ")
if len(splitted) == 2:
key, value = splitted
res[key] = value
else:
print(line)
return res
except Exception as exception:
print(exception)
return res


def main(model_name):
# Get metrics
results = pd.read_csv("./inference_results.csv")
Expand Down Expand Up @@ -73,14 +137,13 @@ def main(model_name):
# Collect everything
session_data: Dict[str, Union[Dict, List]] = {}

ec2_metadata = get_ec2_metadata()

# Create machine
# We should probably add the platform to the DB too
session_data["machine"] = {
"machine_name": None,
"machine_specs": {
"cpu": None,
"ram": None,
"os": None,
},
"machine_name": ec2_metadata.get("instance-type", socket.gethostname()),
"machine_specs": get_system_information(),
}

# Create experiments
Expand Down

0 comments on commit 59aff6b

Please sign in to comment.