Skip to content

Commit c859d5c

Browse files
author
Hilik Yochai
committed
move to be based on main
1 parent e07f7ad commit c859d5c

File tree

3 files changed

+650
-0
lines changed

3 files changed

+650
-0
lines changed
+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#pragma once
2+
3+
#include <deque>
4+
#include <memory>
5+
#include <cassert>
6+
#include <climits>
7+
#include <queue>
8+
#include <random>
9+
#include <iostream>
10+
#include <algorithm>
11+
#include <unordered_map>
12+
#include <sys/resource.h>
13+
#include <fstream>
14+
#include <shared_mutex>
15+
16+
#include "visited_nodes_handler.h"
17+
#include "VecSim/spaces/spaces.h"
18+
#include "VecSim/memory/vecsim_malloc.h"
19+
#include "VecSim/utils/vecsim_stl.h"
20+
#include "VecSim/utils/vec_utils.h"
21+
#include "VecSim/utils/data_block.h"
22+
#include "VecSim/utils/vecsim_results_container.h"
23+
#include "VecSim/query_result_definitions.h"
24+
#include "VecSim/vec_sim_common.h"
25+
#include "VecSim/vec_sim_index.h"
26+
#include "VecSim/tombstone_interface.h"
27+
28+
#ifdef BUILD_TESTS
29+
#include "hnsw_serialization_utils.h"
30+
#include "VecSim/utils/serializer.h"
31+
#endif
32+
33+
using std::pair;
34+
using graphNodeType = pair<idType, ushort>; // represented as: (element_id, level)
35+
36+
37+
38+
class absEdges {
39+
public:
40+
absEdges();
41+
virtual ~absEdges();
42+
43+
virtual void push(idType id) = 0;
44+
45+
virtual bool removeIdIfExists(idType element_id) = 0;
46+
virtual void removeId(idType element_id) = 0;
47+
48+
virtual std::pair<size_t, const idType *> Get() = 0;
49+
virtual void Set(std::pair<size_t, const idType *> inp) = 0;
50+
51+
virtual void save(std::ofstream &output) ;
52+
virtual void restore(std::ifstream &input);
53+
};
54+
55+
56+
// vector metadata contains all the metadata of the vector;
57+
// this is replacing the id->metadata table and the element graph data
58+
//
59+
60+
struct VectorMetaData
61+
{
62+
enum Flags {
63+
DELETE_MARK = 0x1, // element is logically deleted, but still exists in the graph
64+
IN_PROCESS = 0x2, // element is being inserted into the graph
65+
PERMANENT_DELETED = 0x4, // element no longer in the graph
66+
};
67+
VectorMetaData(const labelType &label, uint8_t max_level) :
68+
label_(label), max_level_(max_level), flags_(0) {}
69+
70+
VectorMetaData(const VectorMetaData &src) :
71+
label_(src.label_), max_level_(src.max_level_)
72+
{flags_ = char(src.flags_);}
73+
74+
// mark methods
75+
void mark(Flags flag) {
76+
flags_ |= flag;
77+
}
78+
void unmark(Flags flag) {
79+
flags_ &= ~flag;
80+
}
81+
bool ismarked(Flags flag) const {
82+
return flags_ & flag;
83+
}
84+
85+
labelType label_;
86+
uint8_t max_level_;
87+
std::atomic<uint8_t> flags_ = 0;
88+
std::mutex NodeGuard;
89+
};
90+
91+
92+
class WriteBatch;
93+
class absGraphData {
94+
public:
95+
absGraphData() {}
96+
virtual ~absGraphData() {};
97+
98+
// vector methods
99+
virtual const char *
100+
getVectorByInternalId(idType internal_id) const = 0;
101+
102+
virtual void
103+
multiGetVectors(const std::vector<idType> &,
104+
std::vector<const char *> &results) const = 0;
105+
106+
virtual idType
107+
pushVector(const void *vector_data,
108+
int max_level,
109+
const labelType &label,
110+
WriteBatch *wb) = 0;
111+
112+
// premanently delete the vector and the edges "free" the id
113+
virtual void
114+
deleteVectorAndEdges(idType internalId,
115+
WriteBatch *wb) = 0;
116+
117+
118+
// vectorMetaData methods
119+
virtual const VectorMetaData &
120+
vectorMetaDataById(idType internal_id) const = 0;
121+
122+
123+
virtual VectorMetaData &
124+
vectorMetaDataById(idType internal_id,
125+
WriteBatch *wb);
126+
127+
128+
129+
130+
131+
// outgoing edges
132+
virtual const absEdges &
133+
GetLevelOutgoingEdges(const graphNodeType &) const = 0;
134+
135+
virtual absEdges &
136+
GetLevelOutgoingEdges(const graphNodeType &,
137+
WriteBatch *) = 0;
138+
139+
140+
// inomming edges
141+
// fetch incoming from the database
142+
virtual const absEdges &
143+
GetLevelIncomingEdges(const graphNodeType &) const = 0;
144+
virtual absEdges &
145+
GetLevelIncomingEdges(const graphNodeType &,
146+
WriteBatch *) = 0;
147+
148+
// support only simple updates (add / delete target) operations
149+
// may not fetch the data from the database
150+
virtual absEdges &
151+
GetLevelVirtualIncomingEdges(const graphNodeType &id,
152+
WriteBatch *) = 0;
153+
// helper methods
154+
155+
// scan the database for the first node after starting id that exist at level
156+
virtual idType
157+
getVectorIdByLevel(short level,
158+
idType startingId) const = 0;
159+
160+
// get a pair of candidates to swap for the gc
161+
// first is a location that is permanent deleted
162+
// second is a location that is valid
163+
// start points is the last pair returned in the prev scan
164+
virtual idType
165+
getGarbadgeCollectionTarget(idType startPoint) const = 0;
166+
167+
// new and commit wrire batch
168+
virtual WriteBatch *newWriteBatch() = 0;
169+
virtual void CommitWriteBatch(WriteBatch *wb) = 0;
170+
171+
172+
virtual void shrinkToFit() = 0;
173+
174+
public:
175+
virtual void save(std::ofstream &output) const = 0;
176+
virtual void restore(std::ifstream &input) = 0;
177+
178+
static absGraphData *
179+
NewRamGraphData(std::shared_ptr<VecSimAllocator> allocator,
180+
size_t block_size,
181+
size_t max_num_outgoing_links,
182+
size_t vector_size_bytes,
183+
size_t initial_capacity,
184+
size_t vector_alignment);
185+
186+
static absGraphData *
187+
NewRamWBGraphData(std::shared_ptr<VecSimAllocator> allocator,
188+
size_t block_size,
189+
size_t max_num_outgoing_links,
190+
size_t vector_size_bytes,
191+
size_t initial_capacity,
192+
size_t vector_alignment);
193+
194+
static absGraphData *
195+
NewDBGraphData(std::shared_ptr<VecSimAllocator> allocator,
196+
std::string db_path);
197+
198+
199+
protected:
200+
201+
};
202+

0 commit comments

Comments
 (0)