-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add huggingface conversion for AMPLIFY
Signed-off-by: Peter St. John <[email protected]>
- Loading branch information
Showing
10 changed files
with
654 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,7 +38,7 @@ EOF | |
# Reinstall TE to avoid debugpy bug in vscode: https://nvbugspro.nvidia.com/bug/5078830 | ||
# Pull the latest TE version from https://github.com/NVIDIA/TransformerEngine/releases | ||
# Use the version that matches the pytorch base container. | ||
ARG TE_TAG=v1.13 | ||
ARG TE_TAG=2215fa5c7557b66034068816020f9f611019e457 | ||
RUN NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi \ | ||
pip --disable-pip-version-check --no-cache-dir install \ | ||
git+https://github.com/NVIDIA/TransformerEngine.git@${TE_TAG} | ||
|
@@ -48,10 +48,13 @@ RUN NVTE_FRAMEWORK=pytorch NVTE_WITH_USERBUFFERS=1 MPI_HOME=/usr/local/mpi \ | |
RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip --disable-pip-version-check --no-cache-dir install \ | ||
git+https://github.com/Dao-AILab/[email protected] | ||
|
||
# Mamba dependancy installation | ||
# Mamba dependency installation | ||
RUN pip --disable-pip-version-check --no-cache-dir install \ | ||
git+https://github.com/state-spaces/[email protected] | ||
|
||
ARG XFORMER_ENGINE_TAG=v0.0.29.post1 | ||
RUN pip install -v -U git+https://github.com/facebookresearch/xformers.git@${XFORMER_ENGINE_TAG}#egg=xformers | ||
|
||
RUN pip install hatchling # needed to install nemo-run | ||
ARG NEMU_RUN_TAG=34259bd3e752fef94045a9a019e4aaf62bd11ce2 | ||
RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMU_RUN_TAG} | ||
|
@@ -100,7 +103,7 @@ COPY ./sub-packages /workspace/bionemo2/sub-packages | |
RUN --mount=type=bind,source=./.git,target=./.git \ | ||
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \ | ||
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \ | ||
--mount=type=cache,target=/root/.cache <<EOF | ||
<<EOF | ||
set -eo pipefail | ||
|
||
uv pip install maturin --no-build-isolation | ||
|
@@ -114,6 +117,7 @@ uv pip install --no-build-isolation \ | |
rm -rf ./3rdparty | ||
rm -rf /tmp/* | ||
rm -rf ./sub-packages/bionemo-noodles/target | ||
rm -rf /root/.cache | ||
EOF | ||
|
||
# In the devcontainer image, we just copy over the finished `dist-packages` folder from the build image back into the | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
encoder.weight torch.Size([27, 640]) | ||
transformer_encoder.0.q.weight torch.Size([640, 640]) | ||
transformer_encoder.0.k.weight torch.Size([640, 640]) | ||
transformer_encoder.0.v.weight torch.Size([640, 640]) | ||
transformer_encoder.0.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.0.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.0.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.0.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.0.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.1.q.weight torch.Size([640, 640]) | ||
transformer_encoder.1.k.weight torch.Size([640, 640]) | ||
transformer_encoder.1.v.weight torch.Size([640, 640]) | ||
transformer_encoder.1.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.1.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.1.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.1.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.1.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.2.q.weight torch.Size([640, 640]) | ||
transformer_encoder.2.k.weight torch.Size([640, 640]) | ||
transformer_encoder.2.v.weight torch.Size([640, 640]) | ||
transformer_encoder.2.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.2.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.2.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.2.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.2.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.3.q.weight torch.Size([640, 640]) | ||
transformer_encoder.3.k.weight torch.Size([640, 640]) | ||
transformer_encoder.3.v.weight torch.Size([640, 640]) | ||
transformer_encoder.3.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.3.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.3.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.3.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.3.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.4.q.weight torch.Size([640, 640]) | ||
transformer_encoder.4.k.weight torch.Size([640, 640]) | ||
transformer_encoder.4.v.weight torch.Size([640, 640]) | ||
transformer_encoder.4.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.4.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.4.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.4.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.4.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.5.q.weight torch.Size([640, 640]) | ||
transformer_encoder.5.k.weight torch.Size([640, 640]) | ||
transformer_encoder.5.v.weight torch.Size([640, 640]) | ||
transformer_encoder.5.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.5.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.5.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.5.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.5.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.6.q.weight torch.Size([640, 640]) | ||
transformer_encoder.6.k.weight torch.Size([640, 640]) | ||
transformer_encoder.6.v.weight torch.Size([640, 640]) | ||
transformer_encoder.6.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.6.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.6.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.6.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.6.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.7.q.weight torch.Size([640, 640]) | ||
transformer_encoder.7.k.weight torch.Size([640, 640]) | ||
transformer_encoder.7.v.weight torch.Size([640, 640]) | ||
transformer_encoder.7.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.7.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.7.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.7.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.7.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.8.q.weight torch.Size([640, 640]) | ||
transformer_encoder.8.k.weight torch.Size([640, 640]) | ||
transformer_encoder.8.v.weight torch.Size([640, 640]) | ||
transformer_encoder.8.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.8.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.8.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.8.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.8.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.9.q.weight torch.Size([640, 640]) | ||
transformer_encoder.9.k.weight torch.Size([640, 640]) | ||
transformer_encoder.9.v.weight torch.Size([640, 640]) | ||
transformer_encoder.9.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.9.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.9.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.9.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.9.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.10.q.weight torch.Size([640, 640]) | ||
transformer_encoder.10.k.weight torch.Size([640, 640]) | ||
transformer_encoder.10.v.weight torch.Size([640, 640]) | ||
transformer_encoder.10.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.10.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.10.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.10.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.10.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.11.q.weight torch.Size([640, 640]) | ||
transformer_encoder.11.k.weight torch.Size([640, 640]) | ||
transformer_encoder.11.v.weight torch.Size([640, 640]) | ||
transformer_encoder.11.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.11.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.11.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.11.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.11.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.12.q.weight torch.Size([640, 640]) | ||
transformer_encoder.12.k.weight torch.Size([640, 640]) | ||
transformer_encoder.12.v.weight torch.Size([640, 640]) | ||
transformer_encoder.12.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.12.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.12.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.12.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.12.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.13.q.weight torch.Size([640, 640]) | ||
transformer_encoder.13.k.weight torch.Size([640, 640]) | ||
transformer_encoder.13.v.weight torch.Size([640, 640]) | ||
transformer_encoder.13.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.13.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.13.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.13.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.13.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.14.q.weight torch.Size([640, 640]) | ||
transformer_encoder.14.k.weight torch.Size([640, 640]) | ||
transformer_encoder.14.v.weight torch.Size([640, 640]) | ||
transformer_encoder.14.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.14.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.14.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.14.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.14.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.15.q.weight torch.Size([640, 640]) | ||
transformer_encoder.15.k.weight torch.Size([640, 640]) | ||
transformer_encoder.15.v.weight torch.Size([640, 640]) | ||
transformer_encoder.15.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.15.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.15.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.15.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.15.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.16.q.weight torch.Size([640, 640]) | ||
transformer_encoder.16.k.weight torch.Size([640, 640]) | ||
transformer_encoder.16.v.weight torch.Size([640, 640]) | ||
transformer_encoder.16.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.16.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.16.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.16.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.16.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.17.q.weight torch.Size([640, 640]) | ||
transformer_encoder.17.k.weight torch.Size([640, 640]) | ||
transformer_encoder.17.v.weight torch.Size([640, 640]) | ||
transformer_encoder.17.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.17.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.17.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.17.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.17.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.18.q.weight torch.Size([640, 640]) | ||
transformer_encoder.18.k.weight torch.Size([640, 640]) | ||
transformer_encoder.18.v.weight torch.Size([640, 640]) | ||
transformer_encoder.18.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.18.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.18.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.18.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.18.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.19.q.weight torch.Size([640, 640]) | ||
transformer_encoder.19.k.weight torch.Size([640, 640]) | ||
transformer_encoder.19.v.weight torch.Size([640, 640]) | ||
transformer_encoder.19.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.19.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.19.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.19.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.19.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.20.q.weight torch.Size([640, 640]) | ||
transformer_encoder.20.k.weight torch.Size([640, 640]) | ||
transformer_encoder.20.v.weight torch.Size([640, 640]) | ||
transformer_encoder.20.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.20.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.20.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.20.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.20.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.21.q.weight torch.Size([640, 640]) | ||
transformer_encoder.21.k.weight torch.Size([640, 640]) | ||
transformer_encoder.21.v.weight torch.Size([640, 640]) | ||
transformer_encoder.21.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.21.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.21.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.21.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.21.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.22.q.weight torch.Size([640, 640]) | ||
transformer_encoder.22.k.weight torch.Size([640, 640]) | ||
transformer_encoder.22.v.weight torch.Size([640, 640]) | ||
transformer_encoder.22.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.22.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.22.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.22.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.22.ffn_norm.weight torch.Size([640]) | ||
transformer_encoder.23.q.weight torch.Size([640, 640]) | ||
transformer_encoder.23.k.weight torch.Size([640, 640]) | ||
transformer_encoder.23.v.weight torch.Size([640, 640]) | ||
transformer_encoder.23.wo.weight torch.Size([640, 640]) | ||
transformer_encoder.23.ffn.w12.weight torch.Size([3424, 640]) | ||
transformer_encoder.23.ffn.w3.weight torch.Size([640, 1712]) | ||
transformer_encoder.23.attention_norm.weight torch.Size([640]) | ||
transformer_encoder.23.ffn_norm.weight torch.Size([640]) | ||
layer_norm_2.weight torch.Size([640]) | ||
decoder.weight torch.Size([27, 640]) | ||
decoder.bias torch.Size([27]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.