Fix github workflow to start Khoj, connect to PG and upload results

- Do not trigger tests to run in ci on update to evals
khoj-ai · Nov 18, 2024 · a2ccf6f · a2ccf6f
1 parent 7c0fd71
commit a2ccf6f
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 6 deletions.
diff --git a/.github/workflows/run_evals.yml b/.github/workflows/run_evals.yml
@@ -45,9 +45,14 @@ jobs:
         env:
           POSTGRES_PASSWORD: postgres
           POSTGRES_USER: postgres
+          POSTGRES_DB: postgres
         ports:
           - 5432:5432
-        options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
 
     steps:
       - uses: actions/checkout@v3
@@ -57,7 +62,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: 3.10
+          python-version: '3.10'
 
       - name: Get App Version
         id: hatch
@@ -88,7 +93,9 @@ jobs:
           GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
           SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
           OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
-          POSTGRES_HOST: postgres
+          KHOJ_ADMIN_EMAIL: khoj
+          KHOJ_ADMIN_PASSWORD: khoj
+          POSTGRES_HOST: localhost
           POSTGRES_PORT: 5432
           POSTGRES_USER: postgres
           POSTGRES_PASSWORD: postgres
@@ -119,4 +126,23 @@ jobs:
         uses: actions/upload-artifact@v3
         with:
           name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
-          path: "*_evaluation_results_*.csv"
+          path: |
+            *_evaluation_results_*.csv
+            *_evaluation_summary_*.txt
+
+      - name: Display Results
+        if: always()
+        run: |
+          # Read and display summary
+          echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
+          echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
+          echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
+          echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+          tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
+
+          # Display in logs too
+          echo "===== EVALUATION RESULTS ====="
+          cat *_evaluation_summary_*.txt
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -5,6 +5,7 @@ on:
     paths:
       - src/khoj/**
       - tests/**
+      - '!tests/evals/**'
       - config/**
       - pyproject.toml
       - .pre-commit-config.yml
@@ -15,6 +16,7 @@ on:
     paths:
       - src/khoj/**
       - tests/**
+      - '!tests/evals/**'
       - config/**
       - pyproject.toml
       - .pre-commit-config.yml

diff --git a/tests/evals/eval.py b/tests/evals/eval.py
@@ -286,10 +286,23 @@ def main():
     logger.info(f"\nOverall Accuracy: {colored_accuracy}")
     logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}")
 
-    # Save results
+    # Save summary to file
+    sample_type = f"Sampling Type: {SAMPLE_SIZE} samples." if SAMPLE_SIZE else "Whole dataset."
+    sample_type += " Randomized." if RANDOMIZE else ""
+    summary = (
+        f"Overall Accuracy: {accuracy:.2%}\n\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}\n\n{sample_type}\n"
+    )
+    summary_file = args.output.replace(".csv", ".txt") if args.output else None
+    summary_file = (
+        summary_file or f"{args.dataset}_evaluation_summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
+    )
+    with open(summary_file, "w") as f:
+        f.write(summary)
+
+    # Save raw results to file
     output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
     df.to_csv(output_file, index=False)
-    logger.info(f"Results saved to {output_file}")
+    logger.info(f"Results saved to {summary_file}, {output_file}")
 
 
 if __name__ == "__main__":