Skip to content

Commit

Permalink
refactor: unroll varint encoding for more cases
Browse files Browse the repository at this point in the history
Also:
- start using stream_data for property testing
- Varint.encode/1 no longer returns an iodata, but a binary

Benchmark shows a slight increase in performances and slight decrease in memory usage
  • Loading branch information
ahamez committed Feb 26, 2025
1 parent cc970a3 commit 9481770
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 34 deletions.
29 changes: 22 additions & 7 deletions lib/protox/varint.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,38 @@ defmodule Protox.Varint do

import Bitwise

@spec encode(integer) :: iodata
def encode(v) when v < 128,
@spec encode(integer) :: binary()
def encode(v) when v < 1 <<< 7,
do: <<v>>

def encode(v) when v < 16_384,
def encode(v) when v < 1 <<< 14,
do: <<1::1, v::7, v >>> 7>>

def encode(v) when v < 2_097_152,
def encode(v) when v < 1 <<< 21,
do: <<1::1, v::7, 1::1, v >>> 7::7, v >>> 14>>

def encode(v) when v < 268_435_456,
def encode(v) when v < 1 <<< 28,
do: <<1::1, v::7, 1::1, v >>> 7::7, 1::1, v >>> 14::7, v >>> 21>>

def encode(v) when v < 34_359_738_368,
def encode(v) when v < 1 <<< 35,
do: <<1::1, v::7, 1::1, v >>> 7::7, 1::1, v >>> 14::7, 1::1, v >>> 21::7, v >>> 28>>

def encode(v), do: [<<1::1, v::7>>, encode(v >>> 7)]
def encode(v) when v < 1 <<< 42,
do:
<<1::1, v::7, 1::1, v >>> 7::7, 1::1, v >>> 14::7, 1::1, v >>> 21::7, 1::1, v >>> 28::7,
v >>> 35>>

def encode(v) when v < 1 <<< 49,
do:
<<1::1, v::7, 1::1, v >>> 7::7, 1::1, v >>> 14::7, 1::1, v >>> 21::7, 1::1, v >>> 28::7,
1::1, v >>> 35::7, v >>> 42>>

def encode(v) when v < 1 <<< 56,
do:
<<1::1, v::7, 1::1, v >>> 7::7, 1::1, v >>> 14::7, 1::1, v >>> 21::7, 1::1, v >>> 28::7,
1::1, v >>> 35::7, 1::1, v >>> 42::7, v >>> 49>>

def encode(v), do: <<1::1, v::7, encode(v >>> 7)::binary>>

@spec decode(binary) :: {non_neg_integer, binary}
def decode(b), do: do_decode(0, 0, b)
Expand Down
3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ defmodule Protox.Mixfile do
{:dialyxir, "~> 1.0", only: [:test, :dev], runtime: false},
{:excoveralls, "~> 0.13", only: [:test], runtime: false},
{:ex_doc, "~> 0.22", only: [:dev], runtime: false},
{:propcheck, github: "alfert/propcheck", ref: "c564e89d", only: [:test, :dev]}
{:propcheck, github: "alfert/propcheck", ref: "c564e89d", only: [:test, :dev]},
{:stream_data, "~> 1.0", only: [:dev, :test], runtime: false}
]
|> maybe_add_muzak_pro()
|> maybe_download_protobuf()
Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@
"proper": {:git, "https://github.com/proper-testing/proper.git", "a5ae5669f01143b0828fc21667d4f5e344aa760b", [ref: "a5ae5669f01143b0828fc21667d4f5e344aa760b"]},
"protobuf": {:git, "https://github.com/protocolbuffers/protobuf.git", "b407e8416e3893036aee5af9a12bd9b6a0e2b2e6", [tag: "v29.3", submodules: true]},
"statistex": {:hex, :statistex, "1.0.0", "f3dc93f3c0c6c92e5f291704cf62b99b553253d7969e9a5fa713e5481cd858a5", [:mix], [], "hexpm", "ff9d8bee7035028ab4742ff52fc80a2aa35cece833cf5319009b52f1b5a86c27"},
"stream_data": {:hex, :stream_data, "1.1.2", "05499eaec0443349ff877aaabc6e194e82bda6799b9ce6aaa1aadac15a9fdb4d", [:mix], [], "hexpm", "129558d2c77cbc1eb2f4747acbbea79e181a5da51108457000020a906813a1a9"},
}
65 changes: 39 additions & 26 deletions test/protox/varint_test.exs
Original file line number Diff line number Diff line change
@@ -1,41 +1,54 @@
defmodule Protox.VarintTest do
use ExUnit.Case
use PropCheck
import Bitwise
use ExUnit.Case, async: true
use ExUnitProperties

property "Unrolled encoding produces the same result as the reference implementation" do
check all(int <- integer(0..(1 <<< 64))) do
assert int |> Protox.Varint.encode() |> IO.iodata_to_binary() ==
int |> encode_reference() |> IO.iodata_to_binary()
end
end

property "Symmetric" do
check all(int <- integer(0..(1 <<< 64))) do
assert int |> Protox.Varint.encode() |> IO.iodata_to_binary() |> Protox.Varint.decode() ==
{int, ""}
end
end

test "Encode" do
assert Protox.Varint.encode(0) == <<0>>
assert Protox.Varint.encode(1) == <<1>>

assert Protox.Varint.encode(300) == <<172, 2>>
assert Protox.Varint.encode((1 <<< 14) - 1) == <<0xFF, 0x7F>>
assert Protox.Varint.encode(1 <<< 14) == <<0x80, 0x80, 0x1>>

assert Protox.Varint.encode(16_383) == <<0xFF, 0x7F>>
assert Protox.Varint.encode(16_384) == <<0x80, 0x80, 0x1>>
assert Protox.Varint.encode((1 <<< 21) - 1) == <<0xFF, 0xFF, 0x7F>>
assert Protox.Varint.encode(1 <<< 21) == <<0x80, 0x80, 0x80, 0x1>>

assert Protox.Varint.encode(2_097_151) == <<0xFF, 0xFF, 0x7F>>
assert Protox.Varint.encode(2_097_152) == <<0x80, 0x80, 0x80, 0x1>>
assert Protox.Varint.encode((1 <<< 28) - 1) == <<0xFF, 0xFF, 0xFF, 0x7F>>
assert Protox.Varint.encode(1 <<< 28) == <<0x80, 0x80, 0x80, 0x80, 0x1>>

assert Protox.Varint.encode(268_435_455) == <<0xFF, 0xFF, 0xFF, 0x7F>>
assert Protox.Varint.encode(268_435_456) == <<0x80, 0x80, 0x80, 0x80, 0x1>>
assert Protox.Varint.encode((1 <<< 35) - 1) == <<0xFF, 0xFF, 0xFF, 0xFF, 0x7F>>
assert Protox.Varint.encode(1 <<< 35) == <<0x80, 0x80, 0x80, 0x80, 0x80, 0x1>>

assert Protox.Varint.encode(34_359_738_367) == <<0xFF, 0xFF, 0xFF, 0xFF, 0x7F>>
end
assert Protox.Varint.encode((1 <<< 42) - 1) == <<0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F>>
assert Protox.Varint.encode(1 <<< 42) == <<0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1>>

test "Decode" do
assert Protox.Varint.decode(<<172, 2>>) == {300, <<>>}
assert Protox.Varint.decode(<<172, 2, 0>>) == {300, <<0>>}
assert Protox.Varint.decode(<<0>>) == {0, <<>>}
assert Protox.Varint.decode(<<1>>) == {1, <<>>}
assert Protox.Varint.decode(<<185, 96>>) == {12_345, <<>>}
assert Protox.Varint.decode(<<185, 224, 0>>) == {12_345, <<>>}
end
assert Protox.Varint.encode((1 <<< 56) - 1) ==
<<0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F>>

@tag :properties
test "Symmetric" do
forall value <- integer() do
encoded = value |> Protox.Varint.encode() |> :binary.list_to_bin()
{decoded, <<>>} = Protox.Varint.decode(encoded)
assert Protox.Varint.encode(1 <<< 56) ==
<<0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1>>

value == decoded
end
assert Protox.Varint.encode((1 <<< 63) - 1) ==
<<0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F>>

assert Protox.Varint.encode(1 <<< 63) ==
<<0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x1>>
end

defp encode_reference(v) when v < 1 <<< 7, do: <<v>>
defp encode_reference(v), do: [<<1::1, v::7>>, encode_reference(v >>> 7)]
end

0 comments on commit 9481770

Please sign in to comment.