Skip to content

Commit

Permalink
Add protobuf dependency and generate types
Browse files Browse the repository at this point in the history
* Add https://github.com/protobufjs/protobuf.js in order to serialize
  and deserialize DDSketch objects as protobufs, so that they can
  efficientlly be transferred over a network
* Check in `proto/DDSketch.proto` definition, and add a new build command
  `yarn generate:proto` that reads this definition, and generates
  the JavaScript and TypeScript blobs (`proto/compiled.{js|d.ts}`)
  necessary to interact with protobufs
  • Loading branch information
brimtown committed Dec 2, 2020
1 parent 08589c8 commit a449654
Show file tree
Hide file tree
Showing 6 changed files with 1,329 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
node_modules
dist
src/ddsketch/proto/compiled.js
src/ddsketch/proto/compiled.d.ts
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
"build": "tsc -p tsconfig.build.json",
"test": "jest",
"lint": "eslint src/**/*",
"typecheck": "tsc --noEmit"
"typecheck": "tsc --noEmit",
"generate:proto": "pbjs -t static-module -w commonjs -o src/ddsketch/proto/compiled.js src/ddsketch/proto/DDSketch.proto && pbts -o src/ddsketch/proto/compiled.d.ts src/ddsketch/proto/compiled.js"
},
"dependencies": {
"math-float64-frexp": "^1.0.0",
"math-float64-ldexp": "^1.0.1"
"math-float64-ldexp": "^1.0.1",
"protobufjs": "^6.10.2"
},
"devDependencies": {
"@types/jest": "^26.0.14",
Expand Down
64 changes: 64 additions & 0 deletions src/ddsketch/proto/DDSketch.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License 2.0.
* This product includes software developed at Datadog (https://www.datadoghq.com/).
* Copyright 2020 Datadog, Inc.
*/

syntax = "proto3";

// A DDSketch is essentially a histogram that partitions the range of positive values into an infinite number of
// indexed bins whose size grows exponentially. It keeps track of the number of values (or possibly floating-point
// weights) added to each bin. Negative values are partitioned like positive values, symmetrically to zero.
// The value zero as well as its close neighborhood that would be mapped to extreme bin indexes is mapped to a specific
// counter.
message DDSketch {
// The mapping between positive values and the bin indexes they belong to.
IndexMapping mapping = 1;

// The store for keeping track of positive values.
Store positiveValues = 2;

// The store for keeping track of negative values. A negative value v is mapped using its positive opposite -v.
Store negativeValues = 3;

// The count for the value zero and its close neighborhood (whose width depends on the mapping).
double zeroCount = 4;
}

// How to map positive values to the bins they belong to.
message IndexMapping {
// The gamma parameter of the mapping, such that bin index that a value v belongs to is roughly equal to
// log(v)/log(gamma).
double gamma = 1;

// An offset that can be used to shift all bin indexes.
double indexOffset = 2;

// To speed up the computation of the index a value belongs to, the computation of the log may be approximated using
// the fact that the log to the base 2 of powers of 2 can be computed at a low cost from the binary representation of
// the input value. Other values can be approximated by interpolating between successive powers of 2 (linearly,
// quadratically or cubically).
// NONE means that the log is to be computed exactly (no interpolation).
Interpolation interpolation = 3;
enum Interpolation {
NONE = 0;
LINEAR = 1;
QUADRATIC = 2;
CUBIC = 3;
}
}

// A Store maps bin indexes to their respective counts.
// Counts can be encoded sparsely using binCounts, but also in a contiguous way using contiguousBinCounts and
// contiguousBinIndexOffset. Given that non-empty bins are in practice usually contiguous or close to one another, the
// latter contiguous encoding method is usually more efficient than the sparse one.
// Both encoding methods can be used conjointly. If a bin appears in both the sparse and the contiguous encodings, its
// count value is the sum of the counts in each encodings.
message Store {
// The bin counts, encoded sparsely.
map<sint32, double> binCounts = 1;

// The bin counts, encoded contiguously. The values of contiguousBinCounts are the counts for the bins of indexes
// o, o+1, o+2, etc., where o is contiguousBinIndexOffset.
repeated double contiguousBinCounts = 2 [packed = true];
sint32 contiguousBinIndexOffset = 3;
}
Loading

0 comments on commit a449654

Please sign in to comment.