From 8622d9352eeb275f7b88e7e22fb4233d8362d85c Mon Sep 17 00:00:00 2001 From: Esmail <113830751+Esmail-ibraheem@users.noreply.github.com> Date: Wed, 15 May 2024 12:17:11 +0300 Subject: [PATCH] Create download.sh --- download.sh | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 download.sh diff --git a/download.sh b/download.sh new file mode 100644 index 0000000..f8843e9 --- /dev/null +++ b/download.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement. + +read -p "Enter the URL from email: " PRESIGNED_URL +echo "" +read -p "Enter the list of models to download without spaces (7B,13B,70B,7B-chat,13B-chat,70B-chat), or press Enter for all: " MODEL_SIZE +TARGET_FOLDER="." # where all files should end up +mkdir -p ${TARGET_FOLDER} + +if [[ $MODEL_SIZE == "" ]]; then + MODEL_SIZE="7B,13B,70B,7B-chat,13B-chat,70B-chat" +fi + +echo "Downloading LICENSE and Acceptable Usage Policy" +wget ${PRESIGNED_URL/'*'/"LICENSE"} -O ${TARGET_FOLDER}"/LICENSE" +wget ${PRESIGNED_URL/'*'/"USE_POLICY.md"} -O ${TARGET_FOLDER}"/USE_POLICY.md" + +echo "Downloading tokenizer" +wget ${PRESIGNED_URL/'*'/"tokenizer.model"} -O ${TARGET_FOLDER}"/tokenizer.model" +wget ${PRESIGNED_URL/'*'/"tokenizer_checklist.chk"} -O ${TARGET_FOLDER}"/tokenizer_checklist.chk" +(cd ${TARGET_FOLDER} && md5sum -c tokenizer_checklist.chk) + +for m in ${MODEL_SIZE//,/ } +do + if [[ $m == "7B" ]]; then + SHARD=0 + MODEL_PATH="llama-2-7b" + elif [[ $m == "7B-chat" ]]; then + SHARD=0 + MODEL_PATH="llama-2-7b-chat" + elif [[ $m == "13B" ]]; then + SHARD=1 + MODEL_PATH="llama-2-13b" + elif [[ $m == "13B-chat" ]]; then + SHARD=1 + MODEL_PATH="llama-2-13b-chat" + elif [[ $m == "70B" ]]; then + SHARD=7 + MODEL_PATH="llama-2-70b" + elif [[ $m == "70B-chat" ]]; then + SHARD=7 + MODEL_PATH="llama-2-70b-chat" + fi + + echo "Downloading ${MODEL_PATH}" + mkdir -p ${TARGET_FOLDER}"/${MODEL_PATH}" + + for s in $(seq -f "0%g" 0 ${SHARD}) + do + wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 0 --continue ${PRESIGNED_URL/'*'/"${MODEL_PATH}/consolidated.${s}.pth"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/consolidated.${s}.pth" + done + + wget ${PRESIGNED_URL/'*'/"${MODEL_PATH}/params.json"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/params.json" + wget ${PRESIGNED_URL/'*'/"${MODEL_PATH}/checklist.chk"} -O ${TARGET_FOLDER}"/${MODEL_PATH}/checklist.chk" + echo "Checking checksums" + (cd ${TARGET_FOLDER}"/${MODEL_PATH}" && md5sum -c checklist.chk) +done +