Skip to content

Commit b9b6229

Browse files
authored
Merge pull request #1 from epoch-research/better-layering-2
Improved use of layering to reduce image size by 6-10x
2 parents a42c3c8 + ec0f86d commit b9b6229

File tree

12 files changed

+1199
-43
lines changed

12 files changed

+1199
-43
lines changed

.github/workflows/build.yaml

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
1-
21
name: Build images
32

43
env:
54
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
65

76
on:
87
workflow_dispatch:
9-
8+
inputs:
9+
architecture:
10+
description: 'CPU architecture (x64 or arm64)'
11+
required: true
12+
type: choice
13+
options:
14+
- x64
15+
- arm64
1016
jobs:
1117
build:
12-
runs-on: ubuntu-latest
18+
# The script will detect the architecture and build the images for that architecture
19+
runs-on: epoch-research-${{ inputs.architecture }}-32core-128GB-1200GB
1320
defaults:
1421
run:
1522
shell: bash -l {0}
@@ -18,15 +25,23 @@ jobs:
1825
uses: actions/checkout@v2
1926
- uses: actions/setup-python@v5
2027
with:
21-
python-version: '3.9'
28+
python-version: '3.11'
2229
- name: Install uv
2330
run: |
2431
curl -LsSf https://astral.sh/uv/install.sh | sh
2532
- name: Install dependencies
2633
run: |
2734
uv pip install --python ${Python_ROOT_DIR} '.'
28-
- name: Install dev dependencies
29-
run: |
30-
uv pip install --python ${Python_ROOT_DIR} pytest pytest-cov
35+
- name: Login to GitHub Container Registry
36+
uses: docker/login-action@v3
37+
with:
38+
registry: ghcr.io
39+
username: ${{ github.repository_owner }}
40+
password: ${{ secrets.GITHUB_TOKEN }}
3141
- name: Run build script
32-
run: python build.py
42+
run: python build.py
43+
- name: Store Docker build logs as artifact
44+
uses: actions/upload-artifact@v4
45+
with:
46+
name: build-logs-${{ inputs.architecture }}
47+
path: logs

.github/workflows/pytest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
uses: actions/checkout@v2
3535
- uses: actions/setup-python@v5
3636
with:
37-
python-version: '3.9'
37+
python-version: '3.11'
3838
- name: Install uv
3939
run: |
4040
curl -LsSf https://astral.sh/uv/install.sh | sh

build.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from swebench.harness import prepare_images
2+
import datasets
3+
4+
5+
def build(dataset_name, repo, limit=None):
6+
dataset = datasets.load_dataset(dataset_name)
7+
split = 'test'
8+
dataset = dataset[split]
9+
10+
if repo:
11+
dataset = dataset.filter(lambda x: x['repo'] == repo)
12+
13+
instance_ids = dataset['instance_id']
14+
15+
if limit:
16+
instance_ids = instance_ids[:limit]
17+
18+
print(f"Building images for {len(instance_ids)} instances")
19+
20+
prepare_images.main(
21+
dataset_name=dataset_name,
22+
split=split,
23+
instance_ids=instance_ids,
24+
max_workers=32,
25+
force_rebuild=False,
26+
open_file_limit=8192,
27+
)
28+
29+
30+
if __name__ == '__main__':
31+
dataset_name = 'princeton-nlp/SWE-bench'
32+
repo: str | None = None
33+
build(dataset_name, repo)

0 commit comments

Comments
 (0)