Skip to content

Commit

Permalink
Fix github workflow to start Khoj, connect to PG and upload results
Browse files Browse the repository at this point in the history
- Do not trigger tests to run in ci on update to evals
  • Loading branch information
debanjum committed Nov 18, 2024
1 parent 7c0fd71 commit a2ccf6f
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 6 deletions.
34 changes: 30 additions & 4 deletions .github/workflows/run_evals.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,14 @@ jobs:
env:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
POSTGRES_DB: postgres
ports:
- 5432:5432
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
Expand All @@ -57,7 +62,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.10
python-version: '3.10'

- name: Get App Version
id: hatch
Expand Down Expand Up @@ -88,7 +93,9 @@ jobs:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
POSTGRES_HOST: postgres
KHOJ_ADMIN_EMAIL: khoj
KHOJ_ADMIN_PASSWORD: khoj
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
Expand Down Expand Up @@ -119,4 +126,23 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
path: "*_evaluation_results_*.csv"
path: |
*_evaluation_results_*.csv
*_evaluation_summary_*.txt
- name: Display Results
if: always()
run: |
# Read and display summary
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
# Display in logs too
echo "===== EVALUATION RESULTS ====="
cat *_evaluation_summary_*.txt
2 changes: 2 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
paths:
- src/khoj/**
- tests/**
- '!tests/evals/**'
- config/**
- pyproject.toml
- .pre-commit-config.yml
Expand All @@ -15,6 +16,7 @@ on:
paths:
- src/khoj/**
- tests/**
- '!tests/evals/**'
- config/**
- pyproject.toml
- .pre-commit-config.yml
Expand Down
17 changes: 15 additions & 2 deletions tests/evals/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,10 +286,23 @@ def main():
logger.info(f"\nOverall Accuracy: {colored_accuracy}")
logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}")

# Save results
# Save summary to file
sample_type = f"Sampling Type: {SAMPLE_SIZE} samples." if SAMPLE_SIZE else "Whole dataset."
sample_type += " Randomized." if RANDOMIZE else ""
summary = (
f"Overall Accuracy: {accuracy:.2%}\n\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}\n\n{sample_type}\n"
)
summary_file = args.output.replace(".csv", ".txt") if args.output else None
summary_file = (
summary_file or f"{args.dataset}_evaluation_summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
)
with open(summary_file, "w") as f:
f.write(summary)

# Save raw results to file
output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
df.to_csv(output_file, index=False)
logger.info(f"Results saved to {output_file}")
logger.info(f"Results saved to {summary_file}, {output_file}")


if __name__ == "__main__":
Expand Down

0 comments on commit a2ccf6f

Please sign in to comment.