Remove ADO collection, add GitHub-based dataset screener and candidate verification workflows #2091
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| env: | |
| EVALUATION_RESULTS_DIR: evaluation_results | |
| jobs: | |
| lint-and-test: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v5 | |
| - name: Setup Python with UV | |
| uses: ./.github/actions/setup-python-uv | |
| with: | |
| all-extras: true | |
| - name: Run Ruff | |
| run: uv run ruff check --output-format=github . | |
| - name: Run tests with coverage | |
| run: uv run pytest --cov=src/bcbench --cov-report=term-missing | |
| select-category: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| category: ${{ steps.random.outputs.category }} | |
| steps: | |
| - name: Select random category | |
| id: random | |
| shell: pwsh | |
| run: | | |
| $categories = @("bug-fix", "test-generation") | |
| $selected = $categories | Get-Random | |
| echo "category=$selected" >> $env:GITHUB_OUTPUT | |
| get-entries: | |
| needs: select-category | |
| uses: ./.github/workflows/get-entries.yml | |
| with: | |
| test-run: true | |
| category: ${{ needs.select-category.outputs.category }} | |
| mock-evaluation: | |
| runs-on: ubuntu-latest | |
| needs: [get-entries, select-category] | |
| if: needs.get-entries.outputs.entries != '[]' | |
| outputs: | |
| results-dir: ${{ env.EVALUATION_RESULTS_DIR }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| entry: ${{ fromJson(needs.get-entries.outputs.entries) }} | |
| name: Test Run for ${{ matrix.entry }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v5 | |
| - name: Setup Python with UV | |
| uses: ./.github/actions/setup-python-uv | |
| - name: Run mock evaluation for ${{ matrix.entry }} | |
| run: uv run bcbench evaluate mock "${{ matrix.entry }}" --category ${{ needs.select-category.outputs.category }} --output-dir evaluation_results --run-id ${{ github.run_id }} | |
| - name: Upload mock evaluation results | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ${{ matrix.entry }} | |
| path: ${{ env.EVALUATION_RESULTS_DIR }}/**/*.jsonl | |
| retention-days: 1 | |
| summarize-results: | |
| needs: [mock-evaluation, select-category] | |
| uses: ./.github/workflows/summarize-results.yml | |
| permissions: | |
| contents: write | |
| id-token: write | |
| with: | |
| results-dir: ${{ needs.mock-evaluation.outputs.results-dir }} | |
| model: ${{ github.run_id }} | |
| agent: "mock-agent" | |
| mock: true | |
| category: ${{ needs.select-category.outputs.category }} | |
| secrets: inherit |