forked from zbackup/zbackup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
chunk_index.hh
129 lines (104 loc) · 3.92 KB
/
chunk_index.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Copyright (c) 2012-2014 Konstantin Isakov <[email protected]> and ZBackup contributors, see CONTRIBUTORS
// Part of ZBackup. Licensed under GNU GPLv2 or later + OpenSSL, see LICENSE
#ifndef CHUNK_INDEX_HH_INCLUDED
#define CHUNK_INDEX_HH_INCLUDED
// <ext/hash_map> is obsolete, but <unordered_map> requires C++11. Make up your
// mind, GNU people!
#undef __DEPRECATED
#include <stdint.h>
#include <exception>
#include <ext/hash_map>
#include <functional>
#include <string>
#include <vector>
#include "appendallocator.hh"
#include "bundle.hh"
#include "chunk_id.hh"
#include "dir.hh"
#include "encryption_key.hh"
#include "endian.hh"
#include "ex.hh"
#include "index_file.hh"
#include "nocopy.hh"
#include "rolling_hash.hh"
#include "tmp_mgr.hh"
using std::vector;
/// __gnu_cxx::hash is not defined for unsigned long long. As uint64_t is
/// typedefed as unsigned long long on all 32-bit architectures and on some
/// 64-bit ones, we need to define this. Our keys should have more or less
/// uniform bit distribution, so on 32-bit systems returning the lower 32 bits
/// should be fine
namespace __gnu_cxx
{
template<>
struct hash< unsigned long long >
{
size_t operator()( unsigned long long v ) const
{ return v; }
};
}
class IndexProcessor
{
public:
virtual void startIndex( string const & ) = 0;
virtual void startBundle( Bundle::Id const & ) = 0;
virtual void processChunk( ChunkId const &, uint32_t ) = 0;
virtual void finishBundle( Bundle::Id const &, BundleInfo const & ) = 0;
virtual void finishIndex( string const & ) = 0;
};
/// Maintains an in-memory hash table allowing to check whether we have a
/// specific chunk or not, and if we do, get the bundle id it's in
class ChunkIndex: NoCopy, IndexProcessor
{
struct Chain
{
ChunkId::CryptoHashPart cryptoHash;
uint32_t size;
Chain * next;
Bundle::Id const * bundleId;
Chain( ChunkId const &, uint32_t, Bundle::Id const * bundleId );
bool equalsTo( ChunkId const & id );
};
/// This hash map stores all known chunk ids
/// TODO: implement a custom hash table for better performance
typedef __gnu_cxx::hash_map< RollingHash::Digest, Chain * > HashTable;
EncryptionKey const & key;
TmpMgr & tmpMgr;
string indexPath;
AppendAllocator storage;
HashTable hashTable;
/// Stores the last used bundle id, which can be re-used
Bundle::Id const * lastBundleId;
public:
DEF_EX( Ex, "Chunk index exception", std::exception )
DEF_EX( exIncorrectChunkIdSize, "Incorrect chunk id size encountered", Ex )
ChunkIndex( EncryptionKey const &, TmpMgr &, string const & indexPath, bool );
struct ChunkInfoInterface
{
/// Returns the full id of the chunk. This function is only called if that
/// full id is actually needed, as its generation requires the expensive
/// calculation of the full hash
virtual ChunkId const & getChunkId()=0;
virtual ~ChunkInfoInterface() {}
};
/// If the given chunk exists, its bundle id is returned, otherwise NULL
Bundle::Id const * findChunk( ChunkId::RollingHashPart,
ChunkInfoInterface &, uint32_t *size = NULL );
/// If the given chunk exists, its bundle id is returned, otherwise NULL
Bundle::Id const * findChunk( ChunkId const &, uint32_t *size = NULL );
/// Adds a new chunk to the index if it did not exist already. Returns true
/// if added, false if existed already
bool addChunk( ChunkId const &, uint32_t, Bundle::Id const & );
void startIndex( string const & );
void startBundle( Bundle::Id const & );
void processChunk( ChunkId const &, uint32_t );
void finishBundle( Bundle::Id const &, BundleInfo const & );
void finishIndex( string const & );
void loadIndex( IndexProcessor & );
size_t size();
private:
/// Inserts new chunk id into the in-memory hash table. Returns the created
/// Chain if it was inserted, NULL if it existed before
Chain * registerNewChunkId( ChunkId const & id, uint32_t, Bundle::Id const * );
};
#endif