Added github workflow

deeppavlov · Nov 8, 2024 · a8f00a9 · a8f00a9
1 parent df82bf2
commit a8f00a9
Show file tree

Hide file tree

Showing 6 changed files with 93 additions and 19 deletions.
diff --git a/.github/workflows/metrics.yml b/.github/workflows/metrics.yml
@@ -0,0 +1,36 @@
+name: check_metrics
+
+on:
+  push:
+    branches:
+    - dev
+    - master
+    - test/**
+  pull_request:
+    branches:
+    - dev
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/dev' && github.ref != 'refs/heads/master' }}
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: set up python 3.9
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.9
+
+      - name: setup poetry and install dependencies
+        run: |
+          python -m pip install --upgrade pip poetry
+          python -m poetry install --with lint --no-ansi --no-interaction
+
+      - name: run metrics check
+        run: |
+          python -m poetry run poe metrics
diff --git a/dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/results/results.json b/dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/results/results.json
@@ -46,21 +46,5 @@
       "all_paths_sampled_avg": 1.0,
       "all_utterances_present_avg": 0.6666666666666666
     }
-  },
-  "2024-11-08 09:00:40.185875": {
-    "DialogueSampler": {
-      "all_paths_sampled": [
-        true,
-        true,
-        true
-      ],
-      "all_utterances_present": [
-        true,
-        true,
-        false
-      ],
-      "all_paths_sampled_avg": 1.0,
-      "all_utterances_present_avg": 0.6666666666666666
-    }
   }
 }
diff --git a/dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/run_autometrics.py b/dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/run_autometrics.py
@@ -8,6 +8,7 @@
 from chatsky_llm_autoconfig.dialogue import Dialogue
 from chatsky_llm_autoconfig.metrics.automatic_metrics import *
 import datetime
+from colorama import Fore
 
 
 with open("dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/test_data/data.json") as f:
@@ -19,6 +20,7 @@ def run_all_algorithms():
     # Get all registered classes
     algorithms = AlgorithmRegistry.get_all()
     print("Classes to test:", *algorithms.keys(), sep="\n")
+    print("------------------\n\n")
     total_metrics = {}
     for class_ in algorithms:
         class_instance = algorithms[class_]["type"]()
@@ -92,6 +94,15 @@ def compare_results(date, old_data):
                 )
             }
 
+    for algorithm, diff in differences.items():
+        print(f"Algorithm: {algorithm}")
+        for metric, value in diff.items():
+            if value < 0:
+                print(f"{metric}: {Fore.RED}{value}{Fore.RESET}")
+            elif value > 0:
+                print(f"{metric}: {Fore.GREEN}{value}{Fore.RESET}")
+            else:
+                print(f"{metric}: {Fore.YELLOW}{value}{Fore.RESET}")
     return differences
 
 
@@ -102,7 +113,7 @@ def compare_results(date, old_data):
     date = str(datetime.datetime.now())
     new_metrics = {date: run_all_algorithms()}
     old_data.update(new_metrics)
-    print(compare_results(date, old_data))
+    compare_results(date, old_data)
 
     with open("dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/results/results.json", "w") as f:
         f.write(json.dumps(old_data, indent=2, ensure_ascii=False))
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,6 +11,7 @@ chatsky_llm_autoconfig = {path = "dev_packages/chatsky_llm_autoconfig", develop
 tqdm = "^4.66.5"
 poethepoet = "^0.29.0"
 pandas = "^2.2.3"
+colorama = "^0.4.6"
 
 [tool.poetry.group.docs]
 optional = true
@@ -36,6 +37,7 @@ build-backend = "poetry.core.masonry.api"
 
 
 
+
 [tool.poetry.group.lint]
 optional = true
 
@@ -47,6 +49,7 @@ mypy = "*"
 
 
 
+
 [tool.poetry.group.tests]
 optional = true
 
@@ -61,4 +64,5 @@ flake = "scripts.codestyle:_run_flake"
 black = "scripts.codestyle:_run_black(modify=False)"
 format = "scripts.codestyle:_run_black(modify=True)"
 test = "scripts.test:_test"
+metrics = "scripts.metrics:_run_check"
 lint.sequence = ["flake", "black"]
diff --git a/scripts/metrics.py b/scripts/metrics.py
@@ -0,0 +1,39 @@
+import json
+
+def _run_check():
+    with open("dev_packages/chatsky_llm_autoconfig/chatsky_llm_autoconfig/autometrics/results/results.json") as f:
+        metrics_data = json.load(f)
+
+    current_date = list(metrics_data.keys())[-1]
+    current_metrics = metrics_data.get(current_date, {})
+    previous_date = list(metrics_data.keys())[-2]
+    previous_metrics = metrics_data.get(previous_date, {})
+
+    differences = {}
+    for algorithm, metrics in current_metrics.items():
+        if algorithm in previous_metrics:
+            differences[algorithm] = {
+                "all_paths_sampled_diff": metrics.get("all_paths_sampled_avg", 0) - previous_metrics[algorithm].get("all_paths_sampled_avg", 0),
+                "all_utterances_present_diff": metrics.get("all_utterances_present_avg", 0) - previous_metrics[algorithm].get("all_utterances_present_avg", 0),
+                "all_roles_correct_diff": metrics.get("all_roles_correct_avg", 0) - previous_metrics[algorithm].get("all_roles_correct_avg", 0),
+                "is_correct_length_diff": metrics.get("is_correct_lenght_avg", 0) - previous_metrics[algorithm].get("is_correct_lenght_avg", 0),
+                "total_diff": (
+                    metrics.get("all_paths_sampled_avg", 0) +
+                    metrics.get("all_utterances_present_avg", 0) +
+                    metrics.get("all_roles_correct_avg", 0) +
+                    metrics.get("is_correct_lenght_avg", 0)
+                ) - (
+                    previous_metrics[algorithm].get("all_paths_sampled_avg", 0) +
+                    previous_metrics[algorithm].get("all_utterances_present_avg", 0) +
+                    previous_metrics[algorithm].get("all_roles_correct_avg", 0) +
+                    previous_metrics[algorithm].get("is_correct_lenght_avg", 0)
+                )
+            }
+
+    for algorithm, diff in differences.items():
+        print(f"Algorithm: {algorithm}")
+        print(f"Differences: {diff}")
+        if diff["total_diff"] < 0:
+            raise ValueError(f"Total difference for {algorithm} is negative: {diff['total_diff']}")
+
+    print("Everything is fine.")