Skip to content

Commit

Permalink
Add huggingface conversion for AMPLIFY
Browse files Browse the repository at this point in the history
Signed-off-by: Peter St. John <[email protected]>
  • Loading branch information
pstjohn committed Feb 3, 2025
1 parent 22ba7c5 commit ce8233a
Show file tree
Hide file tree
Showing 10 changed files with 654 additions and 32 deletions.
10 changes: 7 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ EOF
# Reinstall TE to avoid debugpy bug in vscode: https://nvbugspro.nvidia.com/bug/5078830
# Pull the latest TE version from https://github.com/NVIDIA/TransformerEngine/releases
# Use the version that matches the pytorch base container.
ARG TE_TAG=v1.13
ARG TE_TAG=2215fa5c7557b66034068816020f9f611019e457
RUN NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi \
pip --disable-pip-version-check --no-cache-dir install \
git+https://github.com/NVIDIA/TransformerEngine.git@${TE_TAG}
Expand All @@ -48,10 +48,13 @@ RUN NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi \
RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip --disable-pip-version-check --no-cache-dir install \
git+https://github.com/Dao-AILab/[email protected]

# Mamba dependancy installation
# Mamba dependency installation
RUN pip --disable-pip-version-check --no-cache-dir install \
git+https://github.com/state-spaces/[email protected]

ARG XFORMER_ENGINE_TAG=v0.0.29.post1
RUN pip install -v -U git+https://github.com/facebookresearch/xformers.git@${XFORMER_ENGINE_TAG}#egg=xformers

RUN pip install hatchling # needed to install nemo-run
ARG NEMU_RUN_TAG=34259bd3e752fef94045a9a019e4aaf62bd11ce2
RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMU_RUN_TAG}
Expand Down Expand Up @@ -100,7 +103,7 @@ COPY ./sub-packages /workspace/bionemo2/sub-packages
RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
--mount=type=cache,target=/root/.cache <<EOF
<<EOF
set -eo pipefail

uv pip install maturin --no-build-isolation
Expand All @@ -114,6 +117,7 @@ uv pip install --no-build-isolation \
rm -rf ./3rdparty
rm -rf /tmp/*
rm -rf ./sub-packages/bionemo-noodles/target
rm -rf /root/.cache
EOF

# In the devcontainer image, we just copy over the finished `dist-packages` folder from the build image back into the
Expand Down
196 changes: 196 additions & 0 deletions source.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
encoder.weight torch.Size([27, 640])
transformer_encoder.0.q.weight torch.Size([640, 640])
transformer_encoder.0.k.weight torch.Size([640, 640])
transformer_encoder.0.v.weight torch.Size([640, 640])
transformer_encoder.0.wo.weight torch.Size([640, 640])
transformer_encoder.0.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.0.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.0.attention_norm.weight torch.Size([640])
transformer_encoder.0.ffn_norm.weight torch.Size([640])
transformer_encoder.1.q.weight torch.Size([640, 640])
transformer_encoder.1.k.weight torch.Size([640, 640])
transformer_encoder.1.v.weight torch.Size([640, 640])
transformer_encoder.1.wo.weight torch.Size([640, 640])
transformer_encoder.1.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.1.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.1.attention_norm.weight torch.Size([640])
transformer_encoder.1.ffn_norm.weight torch.Size([640])
transformer_encoder.2.q.weight torch.Size([640, 640])
transformer_encoder.2.k.weight torch.Size([640, 640])
transformer_encoder.2.v.weight torch.Size([640, 640])
transformer_encoder.2.wo.weight torch.Size([640, 640])
transformer_encoder.2.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.2.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.2.attention_norm.weight torch.Size([640])
transformer_encoder.2.ffn_norm.weight torch.Size([640])
transformer_encoder.3.q.weight torch.Size([640, 640])
transformer_encoder.3.k.weight torch.Size([640, 640])
transformer_encoder.3.v.weight torch.Size([640, 640])
transformer_encoder.3.wo.weight torch.Size([640, 640])
transformer_encoder.3.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.3.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.3.attention_norm.weight torch.Size([640])
transformer_encoder.3.ffn_norm.weight torch.Size([640])
transformer_encoder.4.q.weight torch.Size([640, 640])
transformer_encoder.4.k.weight torch.Size([640, 640])
transformer_encoder.4.v.weight torch.Size([640, 640])
transformer_encoder.4.wo.weight torch.Size([640, 640])
transformer_encoder.4.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.4.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.4.attention_norm.weight torch.Size([640])
transformer_encoder.4.ffn_norm.weight torch.Size([640])
transformer_encoder.5.q.weight torch.Size([640, 640])
transformer_encoder.5.k.weight torch.Size([640, 640])
transformer_encoder.5.v.weight torch.Size([640, 640])
transformer_encoder.5.wo.weight torch.Size([640, 640])
transformer_encoder.5.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.5.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.5.attention_norm.weight torch.Size([640])
transformer_encoder.5.ffn_norm.weight torch.Size([640])
transformer_encoder.6.q.weight torch.Size([640, 640])
transformer_encoder.6.k.weight torch.Size([640, 640])
transformer_encoder.6.v.weight torch.Size([640, 640])
transformer_encoder.6.wo.weight torch.Size([640, 640])
transformer_encoder.6.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.6.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.6.attention_norm.weight torch.Size([640])
transformer_encoder.6.ffn_norm.weight torch.Size([640])
transformer_encoder.7.q.weight torch.Size([640, 640])
transformer_encoder.7.k.weight torch.Size([640, 640])
transformer_encoder.7.v.weight torch.Size([640, 640])
transformer_encoder.7.wo.weight torch.Size([640, 640])
transformer_encoder.7.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.7.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.7.attention_norm.weight torch.Size([640])
transformer_encoder.7.ffn_norm.weight torch.Size([640])
transformer_encoder.8.q.weight torch.Size([640, 640])
transformer_encoder.8.k.weight torch.Size([640, 640])
transformer_encoder.8.v.weight torch.Size([640, 640])
transformer_encoder.8.wo.weight torch.Size([640, 640])
transformer_encoder.8.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.8.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.8.attention_norm.weight torch.Size([640])
transformer_encoder.8.ffn_norm.weight torch.Size([640])
transformer_encoder.9.q.weight torch.Size([640, 640])
transformer_encoder.9.k.weight torch.Size([640, 640])
transformer_encoder.9.v.weight torch.Size([640, 640])
transformer_encoder.9.wo.weight torch.Size([640, 640])
transformer_encoder.9.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.9.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.9.attention_norm.weight torch.Size([640])
transformer_encoder.9.ffn_norm.weight torch.Size([640])
transformer_encoder.10.q.weight torch.Size([640, 640])
transformer_encoder.10.k.weight torch.Size([640, 640])
transformer_encoder.10.v.weight torch.Size([640, 640])
transformer_encoder.10.wo.weight torch.Size([640, 640])
transformer_encoder.10.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.10.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.10.attention_norm.weight torch.Size([640])
transformer_encoder.10.ffn_norm.weight torch.Size([640])
transformer_encoder.11.q.weight torch.Size([640, 640])
transformer_encoder.11.k.weight torch.Size([640, 640])
transformer_encoder.11.v.weight torch.Size([640, 640])
transformer_encoder.11.wo.weight torch.Size([640, 640])
transformer_encoder.11.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.11.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.11.attention_norm.weight torch.Size([640])
transformer_encoder.11.ffn_norm.weight torch.Size([640])
transformer_encoder.12.q.weight torch.Size([640, 640])
transformer_encoder.12.k.weight torch.Size([640, 640])
transformer_encoder.12.v.weight torch.Size([640, 640])
transformer_encoder.12.wo.weight torch.Size([640, 640])
transformer_encoder.12.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.12.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.12.attention_norm.weight torch.Size([640])
transformer_encoder.12.ffn_norm.weight torch.Size([640])
transformer_encoder.13.q.weight torch.Size([640, 640])
transformer_encoder.13.k.weight torch.Size([640, 640])
transformer_encoder.13.v.weight torch.Size([640, 640])
transformer_encoder.13.wo.weight torch.Size([640, 640])
transformer_encoder.13.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.13.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.13.attention_norm.weight torch.Size([640])
transformer_encoder.13.ffn_norm.weight torch.Size([640])
transformer_encoder.14.q.weight torch.Size([640, 640])
transformer_encoder.14.k.weight torch.Size([640, 640])
transformer_encoder.14.v.weight torch.Size([640, 640])
transformer_encoder.14.wo.weight torch.Size([640, 640])
transformer_encoder.14.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.14.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.14.attention_norm.weight torch.Size([640])
transformer_encoder.14.ffn_norm.weight torch.Size([640])
transformer_encoder.15.q.weight torch.Size([640, 640])
transformer_encoder.15.k.weight torch.Size([640, 640])
transformer_encoder.15.v.weight torch.Size([640, 640])
transformer_encoder.15.wo.weight torch.Size([640, 640])
transformer_encoder.15.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.15.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.15.attention_norm.weight torch.Size([640])
transformer_encoder.15.ffn_norm.weight torch.Size([640])
transformer_encoder.16.q.weight torch.Size([640, 640])
transformer_encoder.16.k.weight torch.Size([640, 640])
transformer_encoder.16.v.weight torch.Size([640, 640])
transformer_encoder.16.wo.weight torch.Size([640, 640])
transformer_encoder.16.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.16.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.16.attention_norm.weight torch.Size([640])
transformer_encoder.16.ffn_norm.weight torch.Size([640])
transformer_encoder.17.q.weight torch.Size([640, 640])
transformer_encoder.17.k.weight torch.Size([640, 640])
transformer_encoder.17.v.weight torch.Size([640, 640])
transformer_encoder.17.wo.weight torch.Size([640, 640])
transformer_encoder.17.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.17.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.17.attention_norm.weight torch.Size([640])
transformer_encoder.17.ffn_norm.weight torch.Size([640])
transformer_encoder.18.q.weight torch.Size([640, 640])
transformer_encoder.18.k.weight torch.Size([640, 640])
transformer_encoder.18.v.weight torch.Size([640, 640])
transformer_encoder.18.wo.weight torch.Size([640, 640])
transformer_encoder.18.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.18.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.18.attention_norm.weight torch.Size([640])
transformer_encoder.18.ffn_norm.weight torch.Size([640])
transformer_encoder.19.q.weight torch.Size([640, 640])
transformer_encoder.19.k.weight torch.Size([640, 640])
transformer_encoder.19.v.weight torch.Size([640, 640])
transformer_encoder.19.wo.weight torch.Size([640, 640])
transformer_encoder.19.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.19.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.19.attention_norm.weight torch.Size([640])
transformer_encoder.19.ffn_norm.weight torch.Size([640])
transformer_encoder.20.q.weight torch.Size([640, 640])
transformer_encoder.20.k.weight torch.Size([640, 640])
transformer_encoder.20.v.weight torch.Size([640, 640])
transformer_encoder.20.wo.weight torch.Size([640, 640])
transformer_encoder.20.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.20.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.20.attention_norm.weight torch.Size([640])
transformer_encoder.20.ffn_norm.weight torch.Size([640])
transformer_encoder.21.q.weight torch.Size([640, 640])
transformer_encoder.21.k.weight torch.Size([640, 640])
transformer_encoder.21.v.weight torch.Size([640, 640])
transformer_encoder.21.wo.weight torch.Size([640, 640])
transformer_encoder.21.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.21.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.21.attention_norm.weight torch.Size([640])
transformer_encoder.21.ffn_norm.weight torch.Size([640])
transformer_encoder.22.q.weight torch.Size([640, 640])
transformer_encoder.22.k.weight torch.Size([640, 640])
transformer_encoder.22.v.weight torch.Size([640, 640])
transformer_encoder.22.wo.weight torch.Size([640, 640])
transformer_encoder.22.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.22.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.22.attention_norm.weight torch.Size([640])
transformer_encoder.22.ffn_norm.weight torch.Size([640])
transformer_encoder.23.q.weight torch.Size([640, 640])
transformer_encoder.23.k.weight torch.Size([640, 640])
transformer_encoder.23.v.weight torch.Size([640, 640])
transformer_encoder.23.wo.weight torch.Size([640, 640])
transformer_encoder.23.ffn.w12.weight torch.Size([3424, 640])
transformer_encoder.23.ffn.w3.weight torch.Size([640, 1712])
transformer_encoder.23.attention_norm.weight torch.Size([640])
transformer_encoder.23.ffn_norm.weight torch.Size([640])
layer_norm_2.weight torch.Size([640])
decoder.weight torch.Size([27, 640])
decoder.bias torch.Size([27])
2 changes: 2 additions & 0 deletions sub-packages/bionemo-amplify/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ dependencies = [
# internal
'bionemo-core',
'bionemo-llm',
'bionemo-esm2',
# external
# 'xformers'
]

[tool.setuptools.packages.find]
Expand Down
Loading

0 comments on commit ce8233a

Please sign in to comment.