Release Khoj version 1.41.0 #141
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: eval | |
on: | |
# Run on every release | |
push: | |
tags: | |
- "*" | |
# Allow manual triggers from GitHub UI | |
workflow_dispatch: | |
inputs: | |
khoj_mode: | |
description: 'Khoj Mode (general/default/research)' | |
required: true | |
default: 'default' | |
type: choice | |
options: | |
- general | |
- default | |
- research | |
dataset: | |
description: 'Dataset to evaluate (frames/simpleqa)' | |
required: true | |
default: 'frames' | |
type: choice | |
options: | |
- frames | |
- simpleqa | |
- gpqa | |
- math500 | |
sample_size: | |
description: 'Number of samples to evaluate' | |
required: false | |
default: 200 | |
type: number | |
sandbox: | |
description: 'Code sandbox to use' | |
required: false | |
default: 'terrarium' | |
type: choice | |
options: | |
- terrarium | |
- e2b | |
chat_model: | |
description: 'Chat model to use' | |
required: false | |
default: 'gemini-2.0-flash' | |
type: string | |
max_research_iterations: | |
description: 'Maximum number of iterations in research mode' | |
required: false | |
default: 5 | |
type: number | |
openai_api_key: | |
description: 'OpenAI API key' | |
required: false | |
default: '' | |
type: string | |
openai_base_url: | |
description: 'Base URL of OpenAI compatible API' | |
required: false | |
default: '' | |
type: string | |
auto_read_webpage: | |
description: 'Auto read webpage on online search' | |
required: false | |
default: 'false' | |
type: choice | |
options: | |
- 'false' | |
- 'true' | |
randomize: | |
description: 'Randomize the sample of questions' | |
required: false | |
default: 'true' | |
type: choice | |
options: | |
- 'false' | |
- 'true' | |
jobs: | |
eval: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
# Use input from manual trigger if available, else run all combinations | |
khoj_mode: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.khoj_mode)) || fromJSON('["general", "default", "research"]') }} | |
dataset: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.dataset)) || fromJSON('["frames", "gpqa"]') }} | |
services: | |
postgres: | |
image: ankane/pgvector | |
env: | |
POSTGRES_PASSWORD: postgres | |
POSTGRES_USER: postgres | |
POSTGRES_DB: postgres | |
ports: | |
- 5432:5432 | |
options: >- | |
--health-cmd pg_isready | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Get App Version | |
id: hatch | |
run: | | |
# Mask relevant workflow inputs as secret early | |
OPENAI_API_KEY=$(jq -r '.inputs.openai_api_key' $GITHUB_EVENT_PATH) | |
echo ::add-mask::$OPENAI_API_KEY | |
echo OPENAI_API_KEY="$OPENAI_API_KEY" >> $GITHUB_ENV | |
# Get app version from hatch | |
echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT | |
- name: ⏬️ Install Dependencies | |
env: | |
DEBIAN_FRONTEND: noninteractive | |
run: | | |
# install dependencies | |
sudo apt update && sudo apt install -y git python3-pip libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6 | |
# upgrade pip | |
python -m ensurepip --upgrade && python -m pip install --upgrade pip | |
# install terrarium for code sandbox | |
git clone https://github.com/khoj-ai/terrarium.git && cd terrarium && npm install --legacy-peer-deps && mkdir pyodide_cache | |
- name: ⬇️ Install Application | |
run: | | |
sed -i 's/dynamic = \["version"\]/version = "${{ steps.hatch.outputs.version }}"/' pyproject.toml | |
pip install --upgrade .[dev] | |
- name: 📝 Run Eval | |
env: | |
KHOJ_MODE: ${{ matrix.khoj_mode }} | |
SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }} | |
BATCH_SIZE: "20" | |
RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }} | |
KHOJ_URL: "http://localhost:42110" | |
KHOJ_LLM_SEED: "42" | |
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }} | |
KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 10 }} | |
KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }} | |
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} | |
OPENAI_BASE_URL: ${{ github.event_name == 'workflow_dispatch' && inputs.openai_base_url || '' }} | |
SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY || '' }} | |
OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY || ''}} | |
FIRECRAWL_API_KEY: ${{ matrix.dataset != 'math500' && secrets.FIRECRAWL_API_KEY || '' }} | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
E2B_API_KEY: ${{ inputs.sandbox == 'e2b' && secrets.E2B_API_KEY || '' }} | |
E2B_TEMPLATE: ${{ vars.E2B_TEMPLATE }} | |
KHOJ_ADMIN_EMAIL: khoj | |
KHOJ_ADMIN_PASSWORD: khoj | |
POSTGRES_HOST: localhost | |
POSTGRES_PORT: 5432 | |
POSTGRES_USER: postgres | |
POSTGRES_PASSWORD: postgres | |
POSTGRES_DB: postgres | |
USE_EMBEDDED_DB: "true" | |
KHOJ_TELEMETRY_DISABLE: "True" # To disable telemetry for tests | |
run: | | |
# Start Khoj server in background | |
khoj --anonymous-mode --non-interactive & | |
# Start code sandbox | |
npm install -g pm2 | |
NODE_ENV=production npm run ci --prefix terrarium | |
# Wait for server to be ready | |
timeout=120 | |
while ! curl -s http://localhost:42110/api/health > /dev/null; do | |
if [ $timeout -le 0 ]; then | |
echo "Timed out waiting for Khoj server" | |
exit 1 | |
fi | |
echo "Waiting for Khoj server..." | |
sleep 2 | |
timeout=$((timeout-2)) | |
done | |
# Run evals | |
python tests/evals/eval.py -d ${{ matrix.dataset }} | |
- name: Upload Results | |
if: always() # Upload results even if tests fail | |
uses: actions/upload-artifact@v4 | |
with: | |
name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }} | |
path: | | |
*_evaluation_results_*.csv | |
*_evaluation_summary_*.txt | |
- name: Display Results | |
if: always() | |
run: | | |
# Read and display summary | |
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY | |
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY | |
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY | |
echo "- Chat Model: ${{ inputs.chat_model || 'gemini-2.0-flash' }}" >> $GITHUB_STEP_SUMMARY | |
echo "- Code Sandbox: ${{ inputs.sandbox || 'terrarium' }}" >> $GITHUB_STEP_SUMMARY | |
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY | |
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY | |
# Display in logs too | |
echo "===== EVALUATION RESULTS =====" | |
cat *_evaluation_summary_*.txt |