Skip to content

Commit

Permalink
Build: release shotit-worker 0.9.10 (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
Leslie-Wong-H authored Jul 14, 2023
2 parents ec95ebc + 48fc80d commit 09e2d79
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 27 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/shotit/shotit-worker/docker-image.yml?branch=main&style=flat-square)](https://github.com/shotit/shotit-worker/actions)
[![GitHub release](https://img.shields.io/github/release/shotit/shotit-worker.svg)](https://github.com/shotit/shotit-worker/releases/latest)
[![Watcher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-watcher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher)
[![Watcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-watcher/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher)
[![Watcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-watcher/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-watcher)
[![Hasher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-hasher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher)
[![Hasher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-hasher/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher)
[![Hasher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-hasher/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-hasher)
[![Loader Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-loader?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader)
[![Loader Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-loader/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader)
[![Loader Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-loader/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-loader)
[![Searcher Docker](https://img.shields.io/docker/pulls/lesliewong007/shotit-worker-searcher?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher)
[![Searcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-searcher/v0.9.9?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher)
[![Searcher Docker Image Size](https://img.shields.io/docker/image-size/lesliewong007/shotit-worker-searcher/v0.9.10?style=flat-square)](https://hub.docker.com/r/lesliewong007/shotit-worker-searcher)

Backend workers for [shotit](https://github.com/shotit/shotit). Four core workers of shotit: watcher, hasher, loader and searcher.

Expand Down
27 changes: 14 additions & 13 deletions loader.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import WebSocket from "ws";
import xmldoc from "xmldoc";
import lzma from "lzma-native";
import fetch from "node-fetch";
import { MilvusClient } from "@zilliz/milvus2-sdk-node";
import { MilvusClient, DataType } from "@zilliz/milvus2-sdk-node";
import cron from "node-cron";
import lodash from "lodash";
const { chunk, flatten } = lodash;
Expand All @@ -22,7 +22,7 @@ const openHandle = async () => {
const initializeMilvusCollection = async () => {
const milvusClient = new MilvusClient({
address: MILVUS_URL,
timeout: 60 * 1000, // 60s
timeout: 5 * 60 * 1000, // 5 mins
});

const params = {
Expand All @@ -32,7 +32,7 @@ const initializeMilvusCollection = async () => {
{
name: "cl_ha",
description: "Dynamic fields for LIRE Solr",
data_type: 101, // DataType.FloatVector
data_type: DataType.FloatVector,
dim: 100,
},
// {
Expand All @@ -42,14 +42,14 @@ const initializeMilvusCollection = async () => {
// description: "Metric Spaces Indexing",
// },
{
name: "id",
data_type: 21, //DataType.VarChar
name: "hash_id",
data_type: DataType.VarChar,
max_length: 500,
description: "${imdbID}/${fileName}/${time}",
},
{
name: "primary_key",
data_type: 5, //DataType.Int64
data_type: DataType.Int64,
is_primary_key: true,
description: "Primary Key",
},
Expand Down Expand Up @@ -169,7 +169,7 @@ const messageHandle = async (data) => {

const milvusClient = new MilvusClient({
address: MILVUS_URL,
timeout: 60 * 1000, // 60s
timeout: 5 * 60 * 1000, // 5 mins
});
// The retry mechanism to prevent GRPC error
const fallBack = async () => {
Expand All @@ -180,7 +180,7 @@ const messageHandle = async (data) => {
// for (let i = 0; i < dedupedHashList.length; i++) {
// const doc = dedupedHashList[i];
// jsonData[i] = {
// id: `${file}/${doc.time.toFixed(2)}`,
// hash_id: `${file}/${doc.time.toFixed(2)}`,
// // cl_hi: doc.cl_hi, // reduce index size
// cl_ha: getNormalizedCharCodesVector(doc.cl_ha, 100, 1),
// primary_key: getPrimaryKey(doc.cl_hi),
Expand All @@ -194,7 +194,7 @@ const messageHandle = async (data) => {
for (let i = 0; i < dedupedHashList.length; i++) {
const doc = dedupedHashList[i];
jsonData[i] = {
id: `${file}/${doc.time.toFixed(2)}`,
hash_id: `${file}/${doc.time.toFixed(2)}`,
// cl_hi: doc.cl_hi, // reduce index size
cl_ha: getNormalizedCharCodesVector(doc.cl_ha, 100, 1),
primary_key: getPrimaryKey(doc.cl_hi),
Expand All @@ -209,9 +209,10 @@ const messageHandle = async (data) => {
);
const jsonData = flatten(segments);

// Pause for 5 seconds to make node arrange the compute resource.
console.log("Pause for 5 seconds");
await new Promise((resolve) => setTimeout(resolve, 5000));
// Pause for 1 second to make node arrange the compute resource.
// Note: not 5 in case of gRPC Error: 13 INTERNAL: No message received
console.log("Pause for 1 second");
await new Promise((resolve) => setTimeout(resolve, 1000));

console.log(`Uploading JSON data to Milvus`);

Expand Down Expand Up @@ -341,7 +342,7 @@ const closeHandle = async () => {
startTime = performance.now();
const milvusClient = new MilvusClient({
address: MILVUS_URL,
timeout: 60 * 1000, // 60s
timeout: 5 * 60 * 1000, // 5 mins
});
console.log("Flush begins", startTime);
await milvusClient.flushSync({ collection_names: ["shotit"] });
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "shotit-worker",
"version": "0.9.9",
"version": "0.9.10",
"description": "Four core workers of shotit: watcher, hasher, loader and searcher",
"main": "",
"type": "module",
Expand Down
28 changes: 19 additions & 9 deletions searcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ const search = async (hash) => {
collection_name: "shotit",
});

const searchParams = {
anns_field: "cl_ha",
topk: "15",
metric_type: "IP",
params: JSON.stringify({ nprobe: 10 }),
};
// const searchParams = {
// anns_field: "cl_ha",
// topk: "15",
// metric_type: "IP",
// params: JSON.stringify({ nprobe: 10 }),
// };

const normalizedCharCodesVector = getNormalizedCharCodesVector(hash);

Expand All @@ -84,13 +84,23 @@ const search = async (hash) => {

console.log(normalizedCharCodesVector);

// const results = await milvusClient.search({
// collection_name: "shotit",
// expr: "",
// vectors: [normalizedCharCodesVector],
// search_params: searchParams,
// vector_type: 101, // DataType.FloatVector
// output_fields: ["hash_id", "primary_key"],
// });

const results = await milvusClient.search({
collection_name: "shotit",
expr: "",
vectors: [normalizedCharCodesVector],
search_params: searchParams,
vector_type: 101, // DataType.FloatVector
output_fields: ["id", "primary_key"],
topk: 15,
metric_type: "IP",
params: { nprobe: 10 },
output_fields: ["hash_id", "primary_key"],
});

return results;
Expand Down

0 comments on commit 09e2d79

Please sign in to comment.