Skip to content

Commit 26920c2

Browse files
committed
Add UUID v4 and v7. Make v7 the default. Optimize UUID generation.
Add v4 and v7 UUIDs as per RFC9562 [1] Use v7 as the new default instead of the previous `sequential`. For v4 and v7 UUIDs add the `[uuids] format` setting to let users control the encoding. These formats are supported: * `base_16` : (aka hex) encoding. This is the default and is the same as the previous (`sequential`) format. * `base_36` : Base 36 encoding using 0-9,a-z characters. This is the most compact encoding, only 25 characters long. * `rfc9562` : The standard RFC9562 UUID encoding, as a hex string with dashes. The UUID v4, v7 along with the old `random` UUID types can skip calling the `gen_server` since they do not need to keep any state. This should make the system more robust and prevent clients from overwhelming the node with concurrent requests to the `/_uuid` endpoint. While at it, expand the test suite and add more coverage and also ensure to test the sizes of the returned values. [1] https://datatracker.ietf.org/doc/rfc9562
1 parent 825b216 commit 26920c2

File tree

4 files changed

+259
-75
lines changed

4 files changed

+259
-75
lines changed

rel/overlay/etc/default.ini

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,12 +615,23 @@ partitioned||* = true
615615
; First 14 characters are the time in hex. Last 18 are random.
616616
; utc_id - Time since Jan 1, 1970 UTC with microseconds, plus utc_id_suffix string
617617
; First 14 characters are the time in hex. uuids/utc_id_suffix string value is appended to these.
618-
;algorithm = sequential
618+
; uuid_v7 - UUID v7. The returned format depends on the [uuids] format setting
619+
; uuid_v4 - UUID v4. The returned format depends on the [uuids] format setting
620+
;algorithm = uuid_v7
619621

620622
; The utc_id_suffix value will be appended to uuids generated by the utc_id algorithm.
621623
; Replicating instances should have unique utc_id_suffix values to ensure uniqueness of utc_id ids.
622624
;utc_id_suffix =
623625

626+
; Encoding format for UUID v4 and v7. The default is base_16 (aka hex). Other
627+
; accepted formats are "base_36" and ""rfc9562". Only v4 and v7 algorithm have
628+
; configurable formats, other algorithms only return base_16 encoded values.
629+
; Examples of the same UUID value encoded in all the supported formats:
630+
; "base_16" : "0199df3759297032b402c3e61fbbf88f"
631+
; "base_36" : "03eudcyamunnfraqdgmopx09b"
632+
; "rfc9562" : "0199df37-5929-7032-b402-c3e61fbbf88f"
633+
;format = base_16
634+
624635
; Maximum number of UUIDs retrievable from /_uuids in a single request
625636
;max_count = 1000
626637

src/couch/src/couch_uuids.erl

Lines changed: 107 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717

1818
-export([start/0, stop/0]).
1919
-export([new/0, random/0]).
20-
-export([v7_hex/0, v7_bin/0]).
20+
-export([v7_bin/0]).
2121
-export([init/1]).
2222
-export([handle_call/3, handle_cast/2, handle_info/2]).
2323

2424
% config_listener api
2525
-export([handle_config_change/5, handle_config_terminate/3]).
2626

27+
-define(DEFAULT_ALGORITHM, "uuid_v7").
2728
-define(RELISTEN_DELAY, 5000).
2829

2930
start() ->
@@ -33,7 +34,13 @@ stop() ->
3334
gen_server:cast(?MODULE, stop).
3435

3536
new() ->
36-
gen_server:call(?MODULE, create).
37+
% Some algorithms can bypass the gen_server
38+
case config_algorithm() of
39+
"random" -> random();
40+
"uuid_v4" -> v4();
41+
"uuid_v7" -> v7();
42+
_ -> gen_server:call(?MODULE, create)
43+
end.
3744

3845
random() ->
3946
couch_util:to_hex_bin(crypto:strong_rand_bytes(16)).
@@ -42,16 +49,12 @@ init([]) ->
4249
ok = config:listen_for_changes(?MODULE, nil),
4350
{ok, state()}.
4451

45-
handle_call(create, _From, random) ->
46-
{reply, random(), random};
47-
handle_call(create, _From, uuid_v7) ->
48-
{reply, v7_hex(), uuid_v7};
4952
handle_call(create, _From, {utc_random, ClockSeq}) ->
5053
{UtcRandom, NewClockSeq} = utc_random(ClockSeq),
5154
{reply, UtcRandom, {utc_random, NewClockSeq}};
5255
handle_call(create, _From, {utc_id, UtcIdSuffix, ClockSeq}) ->
53-
Now = os:timestamp(),
54-
{UtcId, NewClockSeq} = utc_suffix(UtcIdSuffix, ClockSeq, Now),
56+
OsMicros = micros_since_epoch(),
57+
{UtcId, NewClockSeq} = utc_suffix(UtcIdSuffix, ClockSeq, OsMicros),
5558
{reply, UtcId, {utc_id, UtcIdSuffix, NewClockSeq}};
5659
handle_call(create, _From, {sequential, Pref, Seq}) ->
5760
Result = ?l2b(Pref ++ io_lib:format("~6.16.0b", [Seq])),
@@ -109,10 +112,58 @@ v7_bin() ->
109112
<<RandA:12, RandB:62, _:6>> = crypto:strong_rand_bytes(10),
110113
<<MSec:48, 7:4, RandA:12, 2:2, RandB:62>>.
111114

112-
v7_hex() ->
113-
<<A:8/binary, B:4/binary, C:4/binary, D:4/binary, E:12/binary>> = couch_util:to_hex_bin(
114-
v7_bin()
115-
),
115+
%% UUID Version 4
116+
%% https://www.rfc-editor.org/rfc/rfc9562#name-uuid-version-4
117+
%%
118+
%% 0 1 2 3
119+
%% 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
120+
%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
121+
%% | random_a |
122+
%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
123+
%% | random_a | ver | random_b |
124+
%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
125+
%% |var| random_c |
126+
%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
127+
%% | random_c |
128+
%% +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
129+
%%
130+
%% ver = 0100 = 4
131+
%% var = 10 = 2
132+
%%
133+
v4_bin() ->
134+
<<A:48, B:12, C:62, _:6>> = crypto:strong_rand_bytes(16),
135+
<<A:48, 4:4, B:12, 2:2, C:62>>.
136+
137+
v7() ->
138+
Bin = v7_bin(),
139+
Format = config:get("uuids", "format", "base_16"),
140+
uuid_format(Bin, Format).
141+
142+
v4() ->
143+
Bin = v4_bin(),
144+
Format = config:get("uuids", "format", "base_16"),
145+
uuid_format(Bin, Format).
146+
147+
uuid_format(<<_:128>> = Bin, "rfc9562") ->
148+
rfc9562_format(Bin);
149+
uuid_format(<<_:128>> = Bin, "base_36") ->
150+
encode_base36(Bin);
151+
uuid_format(<<_:128>> = Bin, "base_16") ->
152+
couch_util:to_hex_bin(Bin);
153+
uuid_format(<<_:128>>, Other) when is_list(Other) ->
154+
error({unsupported_uuid_format, Other}).
155+
156+
% Opt for a fixed width represention
157+
% 25 == length(integer_to_list(1 bsl 128 - 1, 36)).
158+
%
159+
encode_base36(<<Int:128>>) ->
160+
String = integer_to_list(Int, 36),
161+
Lower = string:to_lower(String),
162+
iolist_to_binary(io_lib:format("~25..0s", [Lower])).
163+
164+
rfc9562_format(<<_:128>> = Bin) ->
165+
Hex = couch_util:to_hex_bin(Bin),
166+
<<A:8/binary, B:4/binary, C:4/binary, D:4/binary, E:12/binary>> = Hex,
116167
<<A/binary, "-", B/binary, "-", C/binary, "-", D/binary, "-", E/binary>>.
117168

118169
new_prefix() ->
@@ -122,37 +173,35 @@ inc() ->
122173
rand:uniform(16#ffd).
123174

124175
state() ->
125-
AlgoStr = config:get("uuids", "algorithm", "sequential"),
176+
AlgoStr = config_algorithm(),
126177
case couch_util:to_existing_atom(AlgoStr) of
127178
random ->
128179
random;
129180
utc_random ->
130-
ClockSeq = micros_since_epoch(os:timestamp()),
181+
ClockSeq = micros_since_epoch(),
131182
{utc_random, ClockSeq};
132183
utc_id ->
133-
ClockSeq = micros_since_epoch(os:timestamp()),
184+
ClockSeq = micros_since_epoch(),
134185
UtcIdSuffix = config:get("uuids", "utc_id_suffix", ""),
135186
{utc_id, UtcIdSuffix, ClockSeq};
136187
sequential ->
137188
{sequential, new_prefix(), inc()};
138189
uuid_v7 ->
139190
uuid_v7;
191+
uuid_v4 ->
192+
uuid_v4;
140193
Unknown ->
141194
throw({unknown_uuid_algorithm, Unknown})
142195
end.
143196

144-
micros_since_epoch({_, _, Micro} = Now) ->
145-
Nowish = calendar:now_to_universal_time(Now),
146-
Nowsecs = calendar:datetime_to_gregorian_seconds(Nowish),
147-
Then = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}),
148-
(Nowsecs - Then) * 1000000 + Micro.
197+
micros_since_epoch() ->
198+
os:system_time(microsecond).
149199

150200
utc_random(ClockSeq) ->
151201
Suffix = couch_util:to_hex(crypto:strong_rand_bytes(9)),
152-
utc_suffix(Suffix, ClockSeq, os:timestamp()).
202+
utc_suffix(Suffix, ClockSeq, micros_since_epoch()).
153203

154-
utc_suffix(Suffix, ClockSeq, Now) ->
155-
OsMicros = micros_since_epoch(Now),
204+
utc_suffix(Suffix, ClockSeq, OsMicros) when is_integer(OsMicros) ->
156205
NewClockSeq =
157206
if
158207
OsMicros =< ClockSeq ->
@@ -165,50 +214,70 @@ utc_suffix(Suffix, ClockSeq, Now) ->
165214
Prefix = io_lib:format("~14.16.0b", [NewClockSeq]),
166215
{list_to_binary(Prefix ++ Suffix), NewClockSeq}.
167216

217+
config_algorithm() ->
218+
config:get("uuids", "algorithm", ?DEFAULT_ALGORITHM).
219+
168220
-ifdef(TEST).
169221

170222
-include_lib("eunit/include/eunit.hrl").
171223

172224
utc_id_time_does_not_advance_test() ->
173225
% Timestamp didn't advance but local clock sequence should and new UUIds
174226
% should be generated
175-
Now = {0, 1, 2},
176-
ClockSeq0 = micros_since_epoch({3, 4, 5}),
177-
{UtcId0, ClockSeq1} = utc_suffix("", ClockSeq0, Now),
227+
ClockSeq0 = 345,
228+
{UtcId0, ClockSeq1} = utc_suffix("", ClockSeq0, 12),
178229
?assert(is_binary(UtcId0)),
179230
?assertEqual(ClockSeq0 + 1, ClockSeq1),
180-
{UtcId1, ClockSeq2} = utc_suffix("", ClockSeq1, Now),
231+
{UtcId1, ClockSeq2} = utc_suffix("", ClockSeq1, ClockSeq0),
181232
?assertNotEqual(UtcId0, UtcId1),
182233
?assertEqual(ClockSeq1 + 1, ClockSeq2).
183234

184235
utc_id_time_advanced_test() ->
185236
% Timestamp advanced, a new UUID generated and also the last clock sequence
186237
% is updated to that timestamp.
187-
Now0 = {0, 1, 2},
188-
ClockSeq0 = micros_since_epoch({3, 4, 5}),
189-
{UtcId0, ClockSeq1} = utc_suffix("", ClockSeq0, Now0),
238+
ClockSeq0 = 345,
239+
{UtcId0, ClockSeq1} = utc_suffix("", ClockSeq0, 12),
190240
?assert(is_binary(UtcId0)),
191241
?assertEqual(ClockSeq0 + 1, ClockSeq1),
192-
Now1 = {9, 9, 9},
193-
{UtcId1, ClockSeq2} = utc_suffix("", ClockSeq1, Now1),
242+
ClockSeq2 = 999,
243+
{UtcId1, ClockSeq3} = utc_suffix("", ClockSeq1, ClockSeq2),
194244
?assert(is_binary(UtcId1)),
195245
?assertNotEqual(UtcId0, UtcId1),
196-
?assertEqual(micros_since_epoch(Now1), ClockSeq2).
246+
?assertEqual(ClockSeq2, ClockSeq3).
197247

198248
utc_random_test_time_does_not_advance_test() ->
199-
{MSec, Sec, USec} = os:timestamp(),
200-
Future = {MSec + 10, Sec, USec},
201-
ClockSeqFuture = micros_since_epoch(Future),
249+
OsMicros = os:system_time(microsecond),
250+
ClockSeqFuture = OsMicros + 10_000_000,
202251
{UtcRandom, NextClockSeq} = utc_random(ClockSeqFuture),
203252
?assert(is_binary(UtcRandom)),
204253
?assertEqual(32, byte_size(UtcRandom)),
205254
?assertEqual(ClockSeqFuture + 1, NextClockSeq).
206255

207256
utc_random_test_time_advance_test() ->
208-
ClockSeqPast = micros_since_epoch({1, 1, 1}),
257+
ClockSeqPast = 111,
209258
{UtcRandom, NextClockSeq} = utc_random(ClockSeqPast),
210259
?assert(is_binary(UtcRandom)),
211260
?assertEqual(32, byte_size(UtcRandom)),
212-
?assert(NextClockSeq > micros_since_epoch({1000, 0, 0})).
261+
?assert(NextClockSeq > 1_000_000_000).
262+
263+
uuid_v7_test() ->
264+
Bin = v7_bin(),
265+
?assertEqual(36, byte_size(uuid_format(Bin, "rfc9562"))),
266+
?assertEqual(32, byte_size(uuid_format(Bin, "base_16"))),
267+
?assertEqual(25, byte_size(uuid_format(Bin, "base_36"))),
268+
?assertError({unsupported_uuid_format, "X"}, uuid_format(Bin, "X")),
269+
Fun1 = fun(_, Acc) -> sets:add_element(v7_bin(), Acc) end,
270+
Set1 = lists:foldl(Fun1, couch_util:new_set(), lists:seq(1, 10_000)),
271+
?assertEqual(10_000, sets:size(Set1)).
272+
273+
uuid_v4_test() ->
274+
Bin = v4_bin(),
275+
?assertEqual(36, byte_size(uuid_format(Bin, "rfc9562"))),
276+
?assertEqual(32, byte_size(uuid_format(Bin, "base_16"))),
277+
?assertEqual(25, byte_size(uuid_format(Bin, "base_36"))),
278+
?assertError({unsupported_uuid_format, "X"}, uuid_format(Bin, "X")),
279+
Fun1 = fun(_, Acc) -> sets:add_element(v7_bin(), Acc) end,
280+
Set1 = lists:foldl(Fun1, couch_util:new_set(), lists:seq(1, 10_000)),
281+
?assertEqual(10_000, sets:size(Set1)).
213282

214283
-endif.

0 commit comments

Comments
 (0)