From b6fcde6850d722b15ce7d202ded403c47c627927 Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 15 Jul 2023 00:53:23 +0800 Subject: [PATCH 1/5] extend MilvusClient connection timeout to 5 mins --- loader.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/loader.js b/loader.js index d887e36..7c6a871 100644 --- a/loader.js +++ b/loader.js @@ -22,7 +22,7 @@ const openHandle = async () => { const initializeMilvusCollection = async () => { const milvusClient = new MilvusClient({ address: MILVUS_URL, - timeout: 60 * 1000, // 60s + timeout: 5 * 60 * 1000, // 5 mins }); const params = { @@ -169,7 +169,7 @@ const messageHandle = async (data) => { const milvusClient = new MilvusClient({ address: MILVUS_URL, - timeout: 60 * 1000, // 60s + timeout: 5 * 60 * 1000, // 5 mins }); // The retry mechanism to prevent GRPC error const fallBack = async () => { @@ -341,7 +341,7 @@ const closeHandle = async () => { startTime = performance.now(); const milvusClient = new MilvusClient({ address: MILVUS_URL, - timeout: 60 * 1000, // 60s + timeout: 5 * 60 * 1000, // 5 mins }); console.log("Flush begins", startTime); await milvusClient.flushSync({ collection_names: ["shotit"] }); From 61a5abd73c764e1d07935525a96276c158a76b01 Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 15 Jul 2023 01:17:07 +0800 Subject: [PATCH 2/5] Adopt the new milvusClient.search practice --- searcher.js | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/searcher.js b/searcher.js index 4b61e8e..0bd1b28 100644 --- a/searcher.js +++ b/searcher.js @@ -67,12 +67,12 @@ const search = async (hash) => { collection_name: "shotit", }); - const searchParams = { - anns_field: "cl_ha", - topk: "15", - metric_type: "IP", - params: JSON.stringify({ nprobe: 10 }), - }; + // const searchParams = { + // anns_field: "cl_ha", + // topk: "15", + // metric_type: "IP", + // params: JSON.stringify({ nprobe: 10 }), + // }; const normalizedCharCodesVector = getNormalizedCharCodesVector(hash); @@ -84,12 +84,22 @@ const search = async (hash) => { console.log(normalizedCharCodesVector); + // const results = await milvusClient.search({ + // collection_name: "shotit", + // expr: "", + // vectors: [normalizedCharCodesVector], + // search_params: searchParams, + // vector_type: 101, // DataType.FloatVector + // output_fields: ["id", "primary_key"], + // }); + const results = await milvusClient.search({ collection_name: "shotit", expr: "", vectors: [normalizedCharCodesVector], - search_params: searchParams, - vector_type: 101, // DataType.FloatVector + topk: 15, + metric_type: "IP", + params: { nprobe: 10 }, output_fields: ["id", "primary_key"], }); From f56afad5d2306f6d946dbcbd0af81c19b110622c Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 15 Jul 2023 01:19:49 +0800 Subject: [PATCH 3/5] Refactor: change `id` to `hash_id` --- loader.js | 14 +++++++------- searcher.js | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/loader.js b/loader.js index 7c6a871..68b6fa5 100644 --- a/loader.js +++ b/loader.js @@ -3,7 +3,7 @@ import WebSocket from "ws"; import xmldoc from "xmldoc"; import lzma from "lzma-native"; import fetch from "node-fetch"; -import { MilvusClient } from "@zilliz/milvus2-sdk-node"; +import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node"; import cron from "node-cron"; import lodash from "lodash"; const { chunk, flatten } = lodash; @@ -32,7 +32,7 @@ const initializeMilvusCollection = async () => { { name: "cl_ha", description: "Dynamic fields for LIRE Solr", - data_type: 101, // DataType.FloatVector + data_type: DataType.FloatVector, dim: 100, }, // { @@ -42,14 +42,14 @@ const initializeMilvusCollection = async () => { // description: "Metric Spaces Indexing", // }, { - name: "id", - data_type: 21, //DataType.VarChar + name: "hash_id", + data_type: DataType.VarChar, max_length: 500, description: "${imdbID}/${fileName}/${time}", }, { name: "primary_key", - data_type: 5, //DataType.Int64 + data_type: DataType.Int64, is_primary_key: true, description: "Primary Key", }, @@ -180,7 +180,7 @@ const messageHandle = async (data) => { // for (let i = 0; i < dedupedHashList.length; i++) { // const doc = dedupedHashList[i]; // jsonData[i] = { - // id: `${file}/${doc.time.toFixed(2)}`, + // hash_id: `${file}/${doc.time.toFixed(2)}`, // // cl_hi: doc.cl_hi, // reduce index size // cl_ha: getNormalizedCharCodesVector(doc.cl_ha, 100, 1), // primary_key: getPrimaryKey(doc.cl_hi), @@ -194,7 +194,7 @@ const messageHandle = async (data) => { for (let i = 0; i < dedupedHashList.length; i++) { const doc = dedupedHashList[i]; jsonData[i] = { - id: `${file}/${doc.time.toFixed(2)}`, + hash_id: `${file}/${doc.time.toFixed(2)}`, // cl_hi: doc.cl_hi, // reduce index size cl_ha: getNormalizedCharCodesVector(doc.cl_ha, 100, 1), primary_key: getPrimaryKey(doc.cl_hi), diff --git a/searcher.js b/searcher.js index 0bd1b28..f33829f 100644 --- a/searcher.js +++ b/searcher.js @@ -90,7 +90,7 @@ const search = async (hash) => { // vectors: [normalizedCharCodesVector], // search_params: searchParams, // vector_type: 101, // DataType.FloatVector - // output_fields: ["id", "primary_key"], + // output_fields: ["hash_id", "primary_key"], // }); const results = await milvusClient.search({ @@ -100,7 +100,7 @@ const search = async (hash) => { topk: 15, metric_type: "IP", params: { nprobe: 10 }, - output_fields: ["id", "primary_key"], + output_fields: ["hash_id", "primary_key"], }); return results; From fb3c2d6b880e3df3d384663d0e464998826955fc Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 15 Jul 2023 05:08:25 +0800 Subject: [PATCH 4/5] Change loader pause to 1 not 5 to fix gRPC error --- loader.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/loader.js b/loader.js index 68b6fa5..0b06336 100644 --- a/loader.js +++ b/loader.js @@ -209,9 +209,10 @@ const messageHandle = async (data) => { ); const jsonData = flatten(segments); - // Pause for 5 seconds to make node arrange the compute resource. - console.log("Pause for 5 seconds"); - await new Promise((resolve) => setTimeout(resolve, 5000)); + // Pause for 1 second to make node arrange the compute resource. + // Note: not 5 in case of gRPC Error: 13 INTERNAL: No message received + console.log("Pause for 1 second"); + await new Promise((resolve) => setTimeout(resolve, 1000)); console.log(`Uploading JSON data to Milvus`); From 48fc80d8b95a874416a15519966168db985b5d75 Mon Sep 17 00:00:00 2001 From: Leslie-Wong-H <79917148leslie@gmail.com> Date: Sat, 15 Jul 2023 05:14:07 +0800 Subject: [PATCH 5/5] Build: release shotit-worker 0.9.10 --- README.md | 8 ++++---- package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cd8553a..3f18a68 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,13 @@ [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/shotit/shotit-worker/docker-image.yml?branch=main&style=flat-square)](https://github.com/shotit/shotit-worker/actions) [![GitHub release](https://img.shields.io/github/release/shotit/shotit-worker.svg)](https://github.com/shotit/shotit-worker/releases/latest) [![Watcher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-watcher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher) -[![Watcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-watcher/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher) +[![Watcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-watcher/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher) [![Hasher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-hasher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher) -[![Hasher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-hasher/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher) +[![Hasher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-hasher/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher) [![Loader Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-loader?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader) -[![Loader Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-loader/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader) +[![Loader Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-loader/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader) [![Searcher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-searcher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher) -[![Searcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-searcher/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher) +[![Searcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-searcher/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher) Backend workers for [shotit](https://github.com/shotit/shotit). Four core workers of shotit: watcher, hasher, loader and searcher. diff --git a/package.json b/package.json index 5c3a5db..e379162 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "shotit-worker", - "version": "0.9.9", + "version": "0.9.10", "description": "Four core workers of shotit: watcher, hasher, loader and searcher", "main": "", "type": "module",