Skip to content

Commit 9825ae2

Browse files
authored
Merge pull request #344 from erizocosmico/feature/smaller-keys
gitbase: custom encode and decode of index keys to save space
2 parents e8d7b16 + 7839638 commit 9825ae2

25 files changed

+1152
-150
lines changed

blobs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ func (i *blobsKeyValueIter) Next() ([]interface{}, []byte, error) {
393393
hash = blob.Hash.String()
394394
}
395395

396-
key, err := encodeIndexKey(packOffsetIndexKey{
396+
key, err := encodeIndexKey(&packOffsetIndexKey{
397397
Repository: i.repo.ID,
398398
Packfile: packfile.String(),
399399
Offset: offset,

blobs_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
147147

148148
var expected = []keyValue{
149149
{
150-
assertEncodeKey(t, packOffsetIndexKey{
150+
assertEncodeKey(t, &packOffsetIndexKey{
151151
Repository: path,
152152
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
153153
Offset: 1591,
@@ -158,7 +158,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
158158
},
159159
},
160160
{
161-
assertEncodeKey(t, packOffsetIndexKey{
161+
assertEncodeKey(t, &packOffsetIndexKey{
162162
Repository: path,
163163
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
164164
Offset: 79864,
@@ -169,7 +169,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
169169
},
170170
},
171171
{
172-
assertEncodeKey(t, packOffsetIndexKey{
172+
assertEncodeKey(t, &packOffsetIndexKey{
173173
Repository: path,
174174
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
175175
Offset: 2418,
@@ -180,7 +180,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
180180
},
181181
},
182182
{
183-
assertEncodeKey(t, packOffsetIndexKey{
183+
assertEncodeKey(t, &packOffsetIndexKey{
184184
Repository: path,
185185
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
186186
Offset: 78932,
@@ -191,7 +191,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
191191
},
192192
},
193193
{
194-
assertEncodeKey(t, packOffsetIndexKey{
194+
assertEncodeKey(t, &packOffsetIndexKey{
195195
Repository: path,
196196
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
197197
Offset: 82000,
@@ -202,7 +202,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
202202
},
203203
},
204204
{
205-
assertEncodeKey(t, packOffsetIndexKey{
205+
assertEncodeKey(t, &packOffsetIndexKey{
206206
Repository: path,
207207
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
208208
Offset: 85438,
@@ -213,7 +213,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
213213
},
214214
},
215215
{
216-
assertEncodeKey(t, packOffsetIndexKey{
216+
assertEncodeKey(t, &packOffsetIndexKey{
217217
Repository: path,
218218
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
219219
Offset: 1780,
@@ -224,7 +224,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
224224
},
225225
},
226226
{
227-
assertEncodeKey(t, packOffsetIndexKey{
227+
assertEncodeKey(t, &packOffsetIndexKey{
228228
Repository: path,
229229
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
230230
Offset: 81707,
@@ -235,7 +235,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
235235
},
236236
},
237237
{
238-
assertEncodeKey(t, packOffsetIndexKey{
238+
assertEncodeKey(t, &packOffsetIndexKey{
239239
Repository: path,
240240
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
241241
Offset: 1752,
@@ -246,7 +246,7 @@ func TestBlobsIndexKeyValueIter(t *testing.T) {
246246
},
247247
},
248248
{
249-
assertEncodeKey(t, packOffsetIndexKey{
249+
assertEncodeKey(t, &packOffsetIndexKey{
250250
Repository: path,
251251
Packfile: "323a4b6b5de684f9966953a043bc800154e5dbfa",
252252
Offset: 2436,

commit_blobs.go

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gitbase
22

33
import (
4+
"bytes"
45
"io"
56

67
"gopkg.in/src-d/go-git.v4/plumbing/object"
@@ -101,7 +102,12 @@ func (*commitBlobsTable) IndexKeyValueIter(
101102
return nil, err
102103
}
103104

104-
return &rowKeyValueIter{iter, colNames, CommitBlobsSchema}, nil
105+
return &rowKeyValueIter{
106+
new(commitBlobsRowKeyMapper),
107+
iter,
108+
colNames,
109+
CommitBlobsSchema,
110+
}, nil
105111
}
106112

107113
// WithProjectFiltersAndIndex implements sql.Indexable interface.
@@ -117,7 +123,7 @@ func (*commitBlobsTable) WithProjectFiltersAndIndex(
117123
return nil, ErrInvalidGitbaseSession.New(ctx.Session)
118124
}
119125

120-
var iter sql.RowIter = &rowIndexIter{index}
126+
var iter sql.RowIter = &rowIndexIter{new(commitBlobsRowKeyMapper), index}
121127

122128
if len(filters) > 0 {
123129
iter = plan.NewFilterIter(ctx, expression.JoinAnd(filters...), iter)
@@ -126,6 +132,63 @@ func (*commitBlobsTable) WithProjectFiltersAndIndex(
126132
return sql.NewSpanIter(span, iter), nil
127133
}
128134

135+
type commitBlobsRowKeyMapper struct{}
136+
137+
func (commitBlobsRowKeyMapper) fromRow(row sql.Row) ([]byte, error) {
138+
if len(row) != 3 {
139+
return nil, errRowKeyMapperRowLength.New(3, len(row))
140+
}
141+
142+
repo, ok := row[0].(string)
143+
if !ok {
144+
return nil, errRowKeyMapperColType.New(0, repo, row[0])
145+
}
146+
147+
commit, ok := row[1].(string)
148+
if !ok {
149+
return nil, errRowKeyMapperColType.New(1, commit, row[1])
150+
}
151+
152+
blob, ok := row[2].(string)
153+
if !ok {
154+
return nil, errRowKeyMapperColType.New(2, blob, row[2])
155+
}
156+
157+
var buf bytes.Buffer
158+
writeString(&buf, repo)
159+
160+
if err := writeHash(&buf, commit); err != nil {
161+
return nil, err
162+
}
163+
164+
if err := writeHash(&buf, blob); err != nil {
165+
return nil, err
166+
}
167+
168+
return buf.Bytes(), nil
169+
}
170+
171+
func (commitBlobsRowKeyMapper) toRow(data []byte) (sql.Row, error) {
172+
var buf = bytes.NewBuffer(data)
173+
174+
repo, err := readString(buf)
175+
if err != nil {
176+
return nil, err
177+
}
178+
179+
commit, err := readHash(buf)
180+
if err != nil {
181+
return nil, err
182+
}
183+
184+
blob, err := readHash(buf)
185+
if err != nil {
186+
return nil, err
187+
}
188+
189+
return sql.Row{repo, commit, blob}, nil
190+
}
191+
129192
func commitBlobsIterBuilder(ctx *sql.Context, selectors selectors, columns []sql.Expression) (RowRepoIter, error) {
130193
repos, err := selectors.textValues("repository_id")
131194
if err != nil {

commit_blobs_test.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"testing"
66

77
"github.com/stretchr/testify/require"
8+
"gopkg.in/src-d/go-git.v4/plumbing"
89
"gopkg.in/src-d/go-mysql-server.v0/sql"
910
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
1011
)
@@ -180,14 +181,21 @@ func TestCommitBlobsIndexKeyValueIter(t *testing.T) {
180181
var expected []keyValue
181182
for _, row := range rows {
182183
var kv keyValue
183-
kv.key = assertEncodeKey(t, row)
184+
kv.key = assertEncodeCommitBlobsRow(t, row)
184185
kv.values = append(kv.values, row[2], row[1])
185186
expected = append(expected, kv)
186187
}
187188

188189
assertIndexKeyValueIter(t, iter, expected)
189190
}
190191

192+
func assertEncodeCommitBlobsRow(t *testing.T, row sql.Row) []byte {
193+
t.Helper()
194+
k, err := new(commitBlobsRowKeyMapper).fromRow(row)
195+
require.NoError(t, err)
196+
return k
197+
}
198+
191199
func TestCommitBlobsIndex(t *testing.T) {
192200
testTableIndex(
193201
t,
@@ -198,3 +206,17 @@ func TestCommitBlobsIndex(t *testing.T) {
198206
)},
199207
)
200208
}
209+
210+
func TestCommitBlobsRowKeyMapper(t *testing.T) {
211+
require := require.New(t)
212+
row := sql.Row{"repo1", plumbing.ZeroHash.String(), plumbing.ZeroHash.String()}
213+
mapper := new(commitBlobsRowKeyMapper)
214+
215+
k, err := mapper.fromRow(row)
216+
require.NoError(err)
217+
218+
row2, err := mapper.toRow(k)
219+
require.NoError(err)
220+
221+
require.Equal(row, row2)
222+
}

commit_files.go

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package gitbase
22

33
import (
4+
"bytes"
45
"io"
56

67
"github.com/sirupsen/logrus"
@@ -291,6 +292,71 @@ type commitFileIndexKey struct {
291292
Commit string
292293
}
293294

295+
func (k *commitFileIndexKey) encode() ([]byte, error) {
296+
var buf bytes.Buffer
297+
writeString(&buf, k.Repository)
298+
if err := writeHash(&buf, k.Packfile); err != nil {
299+
return nil, err
300+
}
301+
302+
if err := writeHash(&buf, k.Hash); err != nil {
303+
return nil, err
304+
}
305+
306+
writeInt64(&buf, k.Offset)
307+
writeString(&buf, k.Name)
308+
writeInt64(&buf, k.Mode)
309+
310+
if err := writeHash(&buf, k.Tree); err != nil {
311+
return nil, err
312+
}
313+
314+
if err := writeHash(&buf, k.Commit); err != nil {
315+
return nil, err
316+
}
317+
318+
return buf.Bytes(), nil
319+
}
320+
321+
func (k *commitFileIndexKey) decode(data []byte) error {
322+
var buf = bytes.NewBuffer(data)
323+
var err error
324+
325+
if k.Repository, err = readString(buf); err != nil {
326+
return err
327+
}
328+
329+
if k.Packfile, err = readHash(buf); err != nil {
330+
return err
331+
}
332+
333+
if k.Hash, err = readHash(buf); err != nil {
334+
return err
335+
}
336+
337+
if k.Offset, err = readInt64(buf); err != nil {
338+
return err
339+
}
340+
341+
if k.Name, err = readString(buf); err != nil {
342+
return err
343+
}
344+
345+
if k.Mode, err = readInt64(buf); err != nil {
346+
return err
347+
}
348+
349+
if k.Tree, err = readHash(buf); err != nil {
350+
return err
351+
}
352+
353+
if k.Commit, err = readHash(buf); err != nil {
354+
return err
355+
}
356+
357+
return nil
358+
}
359+
294360
type commitFilesKeyValueIter struct {
295361
pool *RepositoryPool
296362
repo *Repository
@@ -369,7 +435,7 @@ func (i *commitFilesKeyValueIter) Next() ([]interface{}, []byte, error) {
369435
return nil, nil, err
370436
}
371437

372-
key, err := encodeIndexKey(commitFileIndexKey{
438+
key, err := encodeIndexKey(&commitFileIndexKey{
373439
Repository: i.repo.ID,
374440
Packfile: packfile.String(),
375441
Hash: f.Blob.Hash.String(),

commit_files_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"testing"
66

77
"github.com/stretchr/testify/require"
8+
"gopkg.in/src-d/go-git.v4/plumbing"
89
"gopkg.in/src-d/go-mysql-server.v0/sql"
910
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
1011
)
@@ -78,3 +79,26 @@ func TestCommitFilesIndex(t *testing.T) {
7879
)},
7980
)
8081
}
82+
83+
func TestEncodeCommitFileIndexKey(t *testing.T) {
84+
require := require.New(t)
85+
86+
k := commitFileIndexKey{
87+
Repository: "repo1",
88+
Packfile: plumbing.ZeroHash.String(),
89+
Offset: 1234,
90+
Hash: plumbing.ZeroHash.String(),
91+
Name: "foo/bar.md",
92+
Mode: 5,
93+
Tree: plumbing.ZeroHash.String(),
94+
Commit: plumbing.ZeroHash.String(),
95+
}
96+
97+
data, err := k.encode()
98+
require.NoError(err)
99+
100+
var k2 commitFileIndexKey
101+
require.NoError(k2.decode(data))
102+
103+
require.Equal(k, k2)
104+
}

0 commit comments

Comments
 (0)