Skip to content

Commit

Permalink
Add test for multivector with recall and qps
Browse files Browse the repository at this point in the history
Add one test case to validate recall and qps after the reboot of the node

Signed-off-by: Rodrigo Lopez <[email protected]>
  • Loading branch information
rlmanrique committed Feb 20, 2025
1 parent 8556f19 commit a835dfd
Show file tree
Hide file tree
Showing 10 changed files with 206 additions and 45 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,20 @@ jobs:
first: ${{ needs.real-version-in-tag.outputs.real_version }}
second: "1.26.0"
operator: ">="
newer-or-equal-than-1_29:
name: "Check if the version is newer than 1.29"
needs: real-version-in-tag
runs-on: ubuntu-latest
outputs:
check: ${{ steps.semver_compare.outputs.result }}
steps:
- name: Semver Compare
id: semver_compare
uses: fabriziocacicia/[email protected]
with:
first: ${{ needs.real-version-in-tag.outputs.real_version }}
second: "1.29.0"
operator: ">="
filter-memory-leak:
name: Filter (cache) memory leak when querying while importing
if: ${{ github.event.inputs.test_to_run == 'filter-memory-leak' || github.event.inputs.test_to_run == '' }}
Expand Down Expand Up @@ -341,6 +355,7 @@ jobs:
DISTANCE: l2-squared
REQUIRED_RECALL: 0.999
PERSISTENCE_LSM_ACCESS_STRATEGY: ${{inputs.lsm_access_strategy}}
BOOT_DISK_SIZE: 20GB
steps:
- uses: actions/checkout@v3
- name: Login to Docker Hub
Expand Down Expand Up @@ -374,6 +389,7 @@ jobs:
DISTANCE: l2-squared
REQUIRED_RECALL: 0.992
PERSISTENCE_LSM_ACCESS_STRATEGY: ${{inputs.lsm_access_strategy}}
BOOT_DISK_SIZE: 20GB
steps:
- uses: actions/checkout@v3
- name: Login to Docker Hub
Expand All @@ -397,6 +413,42 @@ jobs:
path: 'results'
destination: 'ann-pipelines/github-action-runs'
glob: '*.json'
ann-benchmarks-multivector-gcp:
name: "[bench GCP] MULTIVECTOR3M"
needs: [newer-or-equal-than-1_29]
if: ${{ (needs.newer-or-equal-than-1_29.outputs.check == 'true') && (github.event.inputs.test_to_run == 'ann-benchmarks-multivector-gcp' || github.event.inputs.test_to_run == '') }}
runs-on: ubuntu-latest
timeout-minutes: 60
env:
DATASET: lotte-recreation-reduced-vl
DISTANCE: dot
REQUIRED_RECALL: 0.992
PERSISTENCE_LSM_ACCESS_STRATEGY: ${{inputs.lsm_access_strategy}}
BOOT_DISK_SIZE: 20GB
MULTIVECTOR_DATASET: true
steps:
- uses: actions/checkout@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{secrets.DOCKER_USERNAME}}
password: ${{secrets.DOCKER_PASSWORD}}
- id: 'gcs_auth'
name: 'Authenticate to Google Cloud'
uses: 'google-github-actions/auth@v1'
with:
credentials_json: ${{secrets.GCP_SERVICE_ACCOUNT_BENCHMARKS}}
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: Run chaos test
if: always()
run: ./ann_benchmark_gcp.sh
- id: 'upload-files'
uses: 'google-github-actions/upload-cloud-storage@v1'
with:
path: 'results'
destination: 'ann-pipelines/github-action-runs'
glob: '*.json'
batch-import-many-classes:
name: One class receives long and expensive batches, user tries to create and delete 100s of classes in parallel
if: ${{ github.event.inputs.test_to_run == 'batch-import-many-classes' || github.event.inputs.test_to_run == '' }}
Expand Down
21 changes: 18 additions & 3 deletions ann_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set -e

dataset=${DATASET:-"sift-128-euclidean"}
distance=${DISTANCE:-"l2-squared"}
multivector=${MULTIVECTOR_DATASET:-"false"}

function wait_weaviate() {
echo "Wait for Weaviate to be ready"
Expand Down Expand Up @@ -37,11 +38,25 @@ mkdir -p datasets
echo "Datasets exists locally"
else
echo "Downloading dataset"
curl -LO http://ann-benchmarks.com/${dataset}.hdf5
if [ "$multivector" = true ]; then

echo "Downloading multivector dataset"
curl -LO https://storage.googleapis.com/ann-datasets/custom/Multivector/${dataset}.hdf5
else
echo "Downloading single vector dataset"
curl -LO http://ann-benchmarks.com/${dataset}.hdf5
fi
fi

)
docker run --network host -t -v "$PWD/results:/workdir/results" -v "$PWD/datasets:/datasets" ann_benchmarks python3 run.py -v /datasets/${dataset}.hdf5 -d $distance -m 32 --labels "pq=false,after_restart=false,weaviate_version=$WEAVIATE_VERSION,cloud_provider=$CLOUD_PROVIDER,machine_type=$MACHINE_TYPE,os=$OS"

if [ "$multivector" = true ]; then
multivector_flag="-mv"
else
multivector_flag=""
fi

docker run --network host -t -v "$PWD/results:/workdir/results" -v "$PWD/datasets:/datasets" ann_benchmarks python3 run.py $multivector_flag -v /datasets/${dataset}.hdf5 -d $distance -m 32 --labels "pq=false,after_restart=false,weaviate_version=$WEAVIATE_VERSION,cloud_provider=$CLOUD_PROVIDER,machine_type=$MACHINE_TYPE,os=$OS"

echo "Initial run complete, now restart Weaviate"

Expand All @@ -53,7 +68,7 @@ echo "Weaviate ready, wait 30s for caches to be hot"
sleep 30

echo "Second run (query only)"
docker run --network host -t -v "$PWD/results:/workdir/results" -v "$PWD/datasets:/datasets" ann_benchmarks python3 run.py -v /datasets/${dataset}.hdf5 -d $distance -m 32 --query-only --labels "pq=false,after_restart=true,weaviate_version=$WEAVIATE_VERSION,cloud_provider=$CLOUD_PROVIDER,machine_type=$MACHINE_TYPE,os=$OS"
docker run --network host -t -v "$PWD/results:/workdir/results" -v "$PWD/datasets:/datasets" ann_benchmarks python3 run.py $multivector_flag -v /datasets/${dataset}.hdf5 -d $distance -m 32 --query-only --labels "pq=false,after_restart=true,weaviate_version=$WEAVIATE_VERSION,cloud_provider=$CLOUD_PROVIDER,machine_type=$MACHINE_TYPE,os=$OS"

docker run --network host -t -v "$PWD/datasets:/datasets" \
-v "$PWD/results:/workdir/results" \
Expand Down
7 changes: 5 additions & 2 deletions ann_benchmark_gcp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ set -e

ZONE=${ZONE:-"us-central1-a"}
MACHINE_TYPE=${MACHINE_TYPE:-"n2-standard-8"}
BOOT_DISK_SIZE=${BOOT_DISK_SIZE:-"10GB"}
CLOUD_PROVIDER="gcp"
OS="ubuntu-2204-lts"
MULTIVECTOR_DATASET=${MULTIVECTOR_DATASET:-"false"}

instance="benchmark-$(uuidgen | tr [:upper:] [:lower:])"

gcloud compute instances create $instance \
--image-family=$OS --image-project=ubuntu-os-cloud \
--machine-type=$MACHINE_TYPE --zone $ZONE
--machine-type=$MACHINE_TYPE --zone $ZONE \
--boot-disk-size=$BOOT_DISK_SIZE

function cleanup {
gcloud compute instances delete $instance --zone $ZONE --quiet
Expand Down Expand Up @@ -43,6 +46,6 @@ gcloud compute ssh --zone $ZONE $instance -- "mkdir -p ~/apps/"
gcloud compute scp --zone $ZONE --recurse apps/ann-benchmarks "$instance:~/apps/"
gcloud compute scp --zone $ZONE --recurse apps/weaviate-no-restart-on-crash/ "$instance:~/apps/"
gcloud compute scp --zone $ZONE --recurse ann_benchmark.sh "$instance:~"
gcloud compute ssh --zone $ZONE $instance -- "DATASET=$DATASET DISTANCE=$DISTANCE REQUIRED_RECALL=$REQUIRED_RECALL WEAVIATE_VERSION=$WEAVIATE_VERSION MACHINE_TYPE=$MACHINE_TYPE CLOUD_PROVIDER=$CLOUD_PROVIDER OS=$OS bash ann_benchmark.sh"
gcloud compute ssh --zone $ZONE $instance -- "MULTIVECTOR_DATASET=$MULTIVECTOR_DATASET DATASET=$DATASET DISTANCE=$DISTANCE REQUIRED_RECALL=$REQUIRED_RECALL WEAVIATE_VERSION=$WEAVIATE_VERSION MACHINE_TYPE=$MACHINE_TYPE CLOUD_PROVIDER=$CLOUD_PROVIDER OS=$OS bash ann_benchmark.sh"
mkdir -p results
gcloud compute scp --zone $ZONE --recurse "$instance:~/results/*.json" results/
7 changes: 5 additions & 2 deletions ann_benchmark_quantization_gcp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ set -e

ZONE=${ZONE:-"us-central1-a"}
MACHINE_TYPE=${MACHINE_TYPE:-"n2-standard-8"}
BOOT_DISK_SIZE=${BOOT_DISK_SIZE:-"10GB"}
MULTIVECTOR_DATASET=${MULTIVECTOR_DATASET:-"false"}
export CLOUD_PROVIDER="gcp"
export OS="ubuntu-2204-lts"

instance="benchmark-$(uuidgen | tr [:upper:] [:lower:])"

gcloud compute instances create $instance \
--image-family=$OS --image-project=ubuntu-os-cloud \
--machine-type=$MACHINE_TYPE --zone $ZONE
--machine-type=$MACHINE_TYPE --zone $ZONE \
--boot-disk-size=$BOOT_DISK_SIZE

function cleanup {
gcloud compute instances delete $instance --quiet --zone $ZONE
Expand Down Expand Up @@ -43,6 +46,6 @@ gcloud compute ssh --zone $ZONE $instance -- "mkdir -p ~/apps/"
gcloud compute scp --zone $ZONE --recurse apps/ann-benchmarks "$instance:~/apps/"
gcloud compute scp --zone $ZONE --recurse apps/weaviate-no-restart-on-crash/ "$instance:~/apps/"
gcloud compute scp --zone $ZONE --recurse ann_benchmark_quantization.sh "$instance:~"
gcloud compute ssh --zone $ZONE $instance -- "DATASET=$DATASET DISTANCE=$DISTANCE REQUIRED_RECALL=$REQUIRED_RECALL QUANTIZATION=$QUANTIZATION WEAVIATE_VERSION=$WEAVIATE_VERSION MACHINE_TYPE=$MACHINE_TYPE CLOUD_PROVIDER=$CLOUD_PROVIDER OS=$OS bash ann_benchmark_quantization.sh"
gcloud compute ssh --zone $ZONE $instance -- "MULTIVECTOR_DATASET=$MULTIVECTOR_DATASET DATASET=$DATASET DISTANCE=$DISTANCE REQUIRED_RECALL=$REQUIRED_RECALL QUANTIZATION=$QUANTIZATION WEAVIATE_VERSION=$WEAVIATE_VERSION MACHINE_TYPE=$MACHINE_TYPE CLOUD_PROVIDER=$CLOUD_PROVIDER OS=$OS bash ann_benchmark_quantization.sh"
mkdir -p results
gcloud compute scp --zone $ZONE --recurse "$instance:~/results/*.json" results/
13 changes: 13 additions & 0 deletions apps/ann-benchmarks/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@ def test_recall_before_after(self):
f"allowed delta for recall before and after restart is {allowed_delta}, got before={mean_recall_before}, after={mean_recall_after}",
)

def test_qps_before_after(self):
allowed_delta = 0.25
mean_qps_before = self.df.loc[self.df["after_restart"] == "false", "qps"].mean()
mean_qps_after = self.df.loc[self.df["after_restart"] == "true", "qps"].mean()

min_val, max_val = min(mean_qps_before, mean_qps_after), max(
mean_qps_before, mean_qps_after
)
self.assertTrue(
min_val > max_val * (1 - allowed_delta),
f"qps before and after restart are not within the allowed delta of {allowed_delta}, got before={mean_qps_before}, after={mean_qps_after}",
)


if __name__ == "__main__":
unittest.main()
7 changes: 4 additions & 3 deletions apps/ann-benchmarks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
weaviate-client==4.7.0-rc.0
weaviate-client>=4.11.0
loguru==0.5.3
seaborn==0.12.2
h5py==3.11.0
pandas==2.2.2
h5py==3.13.0
pandas==2.2.3
torch==2.6.0
22 changes: 13 additions & 9 deletions apps/ann-benchmarks/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from loguru import logger
import h5py
import torch
import grpc
import pathlib
import time
Expand Down Expand Up @@ -30,6 +31,7 @@
"quantization": False,
"dim_to_segment_ratio": 4,
"override": False,
"multivector": False,
}

pathlib.Path("./results").mkdir(parents=True, exist_ok=True)
Expand All @@ -48,6 +50,7 @@
parser.add_argument("-q", "--query-only", action=argparse.BooleanOptionalAction)
parser.add_argument("-o", "--override", action=argparse.BooleanOptionalAction)
parser.add_argument("-s", "--dim-to-segment-ratio")
parser.add_argument("-mv", "--multivector", action=argparse.BooleanOptionalAction, default=False)
args = parser.parse_args()


Expand Down Expand Up @@ -80,9 +83,15 @@
values["dim_to_segment_ratio"] = int(args.dim_to_segment_ratio)
values["labels"]["dim_to_segment_ratio"] = values["dim_to_segment_ratio"]

values["multivector"] = args.multivector

f = h5py.File(args.vectors)
values["labels"]["dataset_file"] = os.path.basename(args.vectors)
vectors = f["train"]
if values["multivector"]:
vector_dim: int = 128
vectors = [torch.from_numpy(sample.reshape(-1, vector_dim)) for sample in vectors]


efC = values["efC"]
distance = args.distance
Expand All @@ -94,13 +103,14 @@
quantization = values["quantization"]
override = values["override"]
dim_to_seg_ratio = values["dim_to_segment_ratio"]
multivector = values["multivector"]
before_import = time.time()
logger.info(
f"Starting import with efC={efC}, m={m}, shards={shards}, distance={distance}"
)
if override == False:
reset_schema(client, efC, m, shards, distance)
load_records(client, vectors, quantization, dim_to_seg_ratio, override)
reset_schema(client, efC, m, shards, distance, multivector)
load_records(client, vectors, quantization, dim_to_seg_ratio, override, multivector)
elapsed = time.time() - before_import
logger.info(
f"Finished import with efC={efC}, m={m}, shards={shards} in {str(timedelta(seconds=elapsed))}"
Expand All @@ -109,11 +119,5 @@
time.sleep(30)

logger.info(f"Starting querying for efC={efC}, m={m}, shards={shards}")
query(
client,
stub,
f,
values["ef"],
values["labels"],
)
query(client, stub, f, values["ef"], values["labels"], values["multivector"])
logger.info(f"Finished querying for efC={efC}, m={m}, shards={shards}")
Loading

0 comments on commit a835dfd

Please sign in to comment.