Skip to content

Release Khoj version 1.41.0 #141

Release Khoj version 1.41.0

Release Khoj version 1.41.0 #141

Workflow file for this run

name: eval
on:
# Run on every release
push:
tags:
- "*"
# Allow manual triggers from GitHub UI
workflow_dispatch:
inputs:
khoj_mode:
description: 'Khoj Mode (general/default/research)'
required: true
default: 'default'
type: choice
options:
- general
- default
- research
dataset:
description: 'Dataset to evaluate (frames/simpleqa)'
required: true
default: 'frames'
type: choice
options:
- frames
- simpleqa
- gpqa
- math500
sample_size:
description: 'Number of samples to evaluate'
required: false
default: 200
type: number
sandbox:
description: 'Code sandbox to use'
required: false
default: 'terrarium'
type: choice
options:
- terrarium
- e2b
chat_model:
description: 'Chat model to use'
required: false
default: 'gemini-2.0-flash'
type: string
max_research_iterations:
description: 'Maximum number of iterations in research mode'
required: false
default: 5
type: number
openai_api_key:
description: 'OpenAI API key'
required: false
default: ''
type: string
openai_base_url:
description: 'Base URL of OpenAI compatible API'
required: false
default: ''
type: string
auto_read_webpage:
description: 'Auto read webpage on online search'
required: false
default: 'false'
type: choice
options:
- 'false'
- 'true'
randomize:
description: 'Randomize the sample of questions'
required: false
default: 'true'
type: choice
options:
- 'false'
- 'true'
jobs:
eval:
runs-on: ubuntu-latest
strategy:
matrix:
# Use input from manual trigger if available, else run all combinations
khoj_mode: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.khoj_mode)) || fromJSON('["general", "default", "research"]') }}
dataset: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.dataset)) || fromJSON('["frames", "gpqa"]') }}
services:
postgres:
image: ankane/pgvector
env:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
POSTGRES_DB: postgres
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Get App Version
id: hatch
run: |
# Mask relevant workflow inputs as secret early
OPENAI_API_KEY=$(jq -r '.inputs.openai_api_key' $GITHUB_EVENT_PATH)
echo ::add-mask::$OPENAI_API_KEY
echo OPENAI_API_KEY="$OPENAI_API_KEY" >> $GITHUB_ENV
# Get app version from hatch
echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT
- name: ⏬️ Install Dependencies
env:
DEBIAN_FRONTEND: noninteractive
run: |
# install dependencies
sudo apt update && sudo apt install -y git python3-pip libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
# upgrade pip
python -m ensurepip --upgrade && python -m pip install --upgrade pip
# install terrarium for code sandbox
git clone https://github.com/khoj-ai/terrarium.git && cd terrarium && npm install --legacy-peer-deps && mkdir pyodide_cache
- name: ⬇️ Install Application
run: |
sed -i 's/dynamic = \["version"\]/version = "${{ steps.hatch.outputs.version }}"/' pyproject.toml
pip install --upgrade .[dev]
- name: 📝 Run Eval
env:
KHOJ_MODE: ${{ matrix.khoj_mode }}
SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }}
BATCH_SIZE: "20"
RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }}
KHOJ_URL: "http://localhost:42110"
KHOJ_LLM_SEED: "42"
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 10 }}
KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_BASE_URL: ${{ github.event_name == 'workflow_dispatch' && inputs.openai_base_url || '' }}
SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY || '' }}
OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY || ''}}
FIRECRAWL_API_KEY: ${{ matrix.dataset != 'math500' && secrets.FIRECRAWL_API_KEY || '' }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
E2B_API_KEY: ${{ inputs.sandbox == 'e2b' && secrets.E2B_API_KEY || '' }}
E2B_TEMPLATE: ${{ vars.E2B_TEMPLATE }}
KHOJ_ADMIN_EMAIL: khoj
KHOJ_ADMIN_PASSWORD: khoj
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres
USE_EMBEDDED_DB: "true"
KHOJ_TELEMETRY_DISABLE: "True" # To disable telemetry for tests
run: |
# Start Khoj server in background
khoj --anonymous-mode --non-interactive &
# Start code sandbox
npm install -g pm2
NODE_ENV=production npm run ci --prefix terrarium
# Wait for server to be ready
timeout=120
while ! curl -s http://localhost:42110/api/health > /dev/null; do
if [ $timeout -le 0 ]; then
echo "Timed out waiting for Khoj server"
exit 1
fi
echo "Waiting for Khoj server..."
sleep 2
timeout=$((timeout-2))
done
# Run evals
python tests/evals/eval.py -d ${{ matrix.dataset }}
- name: Upload Results
if: always() # Upload results even if tests fail
uses: actions/upload-artifact@v4
with:
name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
path: |
*_evaluation_results_*.csv
*_evaluation_summary_*.txt
- name: Display Results
if: always()
run: |
# Read and display summary
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
echo "- Chat Model: ${{ inputs.chat_model || 'gemini-2.0-flash' }}" >> $GITHUB_STEP_SUMMARY
echo "- Code Sandbox: ${{ inputs.sandbox || 'terrarium' }}" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
# Display in logs too
echo "===== EVALUATION RESULTS ====="
cat *_evaluation_summary_*.txt