Skip to content

Commit c901529

Browse files
committed
libzstd 1.4.5 with dict training fix
ZDICT_optimizeTrainFromBuffer_[fast]Cover fails to select dictionary on some inputs: facebook/zstd#2371
1 parent ce98e63 commit c901529

File tree

4 files changed

+39
-31
lines changed

4 files changed

+39
-31
lines changed

ZstdNet.Tests/Binding_Tests.cs

+10-2
Original file line numberDiff line numberDiff line change
@@ -473,10 +473,18 @@ public void CompressAndDecomress_throwsDstSizeTooSmall_Over2GB([Values(false, tr
473473
[Test, Explicit("stress")]
474474
public void TrainDictionaryParallel()
475475
{
476-
var dict = BuildDictionary();
476+
var buffer = Enumerable.Range(0, 100000).Select(i => unchecked((byte)(i * i))).ToArray();
477+
var samples = Enumerable.Range(0, 100)
478+
.Select(i => buffer.Skip(i).Take(200 - i).ToArray())
479+
.ToArray();
480+
481+
var dict = DictBuilder.TrainFromBuffer(samples);
482+
Assert.Greater(dict.Length, 0);
483+
Assert.LessOrEqual(dict.Length, DictBuilder.DefaultDictCapacity);
484+
477485
Enumerable.Range(0, 100000)
478486
.AsParallel().WithDegreeOfParallelism(Environment.ProcessorCount * 4)
479-
.ForAll(_ => Assert.IsTrue(dict.SequenceEqual(BuildDictionary())));
487+
.ForAll(_ => Assert.IsTrue(dict.SequenceEqual(DictBuilder.TrainFromBuffer(samples))));
480488
}
481489

482490
private static byte[] BuildDictionary()

ZstdNet/DictBuilder.cs

+29-29
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,33 @@
1-
using System;
2-
using System.Collections.Generic;
1+
using System;
2+
using System.Collections.Generic;
33
using System.IO;
4-
using System.Linq;
5-
using size_t = System.UIntPtr;
6-
7-
namespace ZstdNet
8-
{
9-
public static class DictBuilder
10-
{
11-
public static byte[] TrainFromBuffer(IEnumerable<byte[]> samples, int dictCapacity = DefaultDictCapacity)
12-
{
13-
var ms = new MemoryStream();
14-
var samplesSizes = samples.Select(sample =>
15-
{
16-
ms.Write(sample, 0, sample.Length);
17-
return (size_t)sample.Length;
18-
}).ToArray();
19-
20-
var dictBuffer = new byte[dictCapacity];
21-
var dictSize = (int)ExternMethods
22-
.ZDICT_trainFromBuffer(dictBuffer, (size_t)dictCapacity, ms.ToArray(), samplesSizes, (uint)samplesSizes.Length)
4+
using System.Linq;
5+
using size_t = System.UIntPtr;
6+
7+
namespace ZstdNet
8+
{
9+
public static class DictBuilder
10+
{
11+
public static byte[] TrainFromBuffer(IEnumerable<byte[]> samples, int dictCapacity = DefaultDictCapacity)
12+
{
13+
var ms = new MemoryStream();
14+
var samplesSizes = samples.Select(sample =>
15+
{
16+
ms.Write(sample, 0, sample.Length);
17+
return (size_t)sample.Length;
18+
}).ToArray();
19+
20+
var dictBuffer = new byte[dictCapacity];
21+
var dictSize = (int)ExternMethods
22+
.ZDICT_trainFromBuffer(dictBuffer, (size_t)dictCapacity, ms.GetBuffer(), samplesSizes, (uint)samplesSizes.Length)
2323
.EnsureZdictSuccess();
2424

25-
if (dictCapacity != dictSize)
25+
if(dictCapacity != dictSize)
2626
Array.Resize(ref dictBuffer, dictSize);
27-
28-
return dictBuffer;
29-
}
30-
31-
public const int DefaultDictCapacity = 112640; // Used by zstd utility by default
32-
}
33-
}
27+
28+
return dictBuffer;
29+
}
30+
31+
public const int DefaultDictCapacity = 112640; // Used by zstd utility by default
32+
}
33+
}

ZstdNet/build/x64/libzstd.dll

0 Bytes
Binary file not shown.

ZstdNet/build/x86/libzstd.dll

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)