Skip to content

Commit f0539c2

Browse files
committed
Add caching of sharding function
The ddl.bucket_id() function needs to know a sharding function. It is costly to obtain the function declaration / definition stored in the _ddl_sharding_func space. Cache contains: raw_tuple - raw sharding metadata, used for get_schema() parsed_func_name - parsed dot notation (like {'foo', 'bar'}) callable - function ready to call, this offloads using of loadstring() error - string with an error: not nil only if setting callable fails Cache will be rebuilded if: * _ddl_sharding_func space changed: cache sets _ddl_sharding_func:on_replace trigger * schema changed: cache checks box.internal.schema_version changes This patch does not serve hot reload techniques. This entails an on_replace trigger duplication if hot reload occurs. Hot reload support will be done in separate task: #87 Benchmarks - 10000000 bucket_id() calls (file test/bench_cache.lua): Baseline (no DDL): 3.38s user 0.01s system 99% cpu 3.389 total After this patch: DDL with function body: 3.81s user 0.01s system 99% cpu 3.818 total DDL with function name: 5.49s user 0.00s system 99% cpu 5.495 total Before patch: DDL with function body: 55.95s user 0.40s system 99% cpu 56.354 total DDL with function name: 13.68s user 0.13s system 99% cpu 13.807 total Closes #82
1 parent 4f0fbd1 commit f0539c2

File tree

5 files changed

+493
-13
lines changed

5 files changed

+493
-13
lines changed

ddl/cache.lua

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
local cache = nil
2+
3+
local SPACE_NAME_IDX = 1
4+
local SHARD_FUNC_NAME_IDX = 2
5+
local SHARD_FUNC_BODY_IDX = 3
6+
7+
-- Build cache.
8+
--
9+
-- Cache structure format:
10+
--
11+
-- cache = {
12+
-- spaces = {
13+
-- [space_name] = {
14+
-- -- raw sharding metadata, used for ddl.get()
15+
-- raw_tuple = <tuple object> (<nil> at error),
16+
-- -- parsed dot notation (like {'foo', 'bar'})
17+
-- parsed_func_name = <table> or <nil>
18+
-- -- a function ready to call
19+
-- callable = <function> or <nil>,
20+
-- -- string with an error: not nil only if setting callable fails
21+
-- error = <string> or <nil>,
22+
-- }
23+
-- },
24+
-- -- current schema version
25+
-- schema_version = <...>,
26+
-- }
27+
28+
-- function returns nothing
29+
local function cache_build()
30+
-- clear cache
31+
cache.spaces = {}
32+
33+
if box.space._ddl_sharding_func == nil then
34+
return
35+
end
36+
37+
for _, tuple in box.space._ddl_sharding_func:pairs() do
38+
local space_name = tuple[SPACE_NAME_IDX]
39+
local func_name = tuple[SHARD_FUNC_NAME_IDX]
40+
local func_body = tuple[SHARD_FUNC_BODY_IDX]
41+
42+
cache.spaces[space_name] = {
43+
raw_tuple = tuple
44+
}
45+
46+
if func_body ~= nil then
47+
local sharding_func, err = loadstring('return ' .. func_body)
48+
if sharding_func == nil then
49+
cache.spaces[space_name].error =
50+
string.format("Body is incorrect in sharding_func for space (%s): %s",
51+
space_name, err)
52+
else
53+
cache.spaces[space_name].callable = sharding_func()
54+
end
55+
elseif func_name ~= nil then
56+
-- we cannot save the function itself into the cache,
57+
-- because the function can be changed in runtime and
58+
-- there is no way to catch this change
59+
local chunks = string.split(func_name, '.')
60+
cache.spaces[space_name].parsed_func_name = chunks
61+
end
62+
end
63+
64+
cache.schema_version = box.internal.schema_version()
65+
66+
end
67+
68+
-- Rebuild cache if _ddl_sharding_func space changed.
69+
local function cache_set_trigger()
70+
if box.space._ddl_sharding_func == nil then
71+
return
72+
end
73+
74+
local trigger_found = false
75+
76+
for _, func in pairs(box.space._ddl_sharding_func:on_replace()) do
77+
if func == cache_build then
78+
trigger_found = true
79+
break
80+
end
81+
end
82+
83+
if not trigger_found then
84+
box.space._ddl_sharding_func:on_replace(cache_build)
85+
end
86+
end
87+
88+
-- Get data from cache.
89+
-- Returns all cached data for "space_name" or nil.
90+
local function cache_get(space_name)
91+
if space_name == nil then
92+
return nil
93+
end
94+
95+
-- using tarantool internal API.
96+
-- this is not reliable, but it is the only way to track
97+
-- schema_version changes. Fix it if a public method appears:
98+
-- https://github.com/tarantool/tarantool/issues/6544
99+
local schema_version = box.internal.schema_version()
100+
101+
if not cache then
102+
cache = {}
103+
box.atomic(cache_build)
104+
cache_set_trigger()
105+
end
106+
107+
-- rebuild cache if database schema changed
108+
if schema_version ~= cache.schema_version then
109+
box.atomic(cache_build)
110+
cache_set_trigger()
111+
end
112+
113+
return cache.spaces[space_name]
114+
end
115+
116+
return {
117+
internal = {
118+
get = cache_get,
119+
}
120+
}

ddl/get.lua

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
local utils = require('ddl.utils')
2+
local cache = require('ddl.cache')
23
local ddl_check = require('ddl.check')
34

45
local function _get_index_field_path(space, index_part)
@@ -66,11 +67,18 @@ local function get_metadata(space_name, metadata_name)
6667
end
6768

6869
local function get_sharding_func(space_name)
69-
local record = get_metadata(space_name, "sharding_func")
70-
if not record then
70+
return cache.internal.get(space_name)
71+
end
72+
73+
local function get_sharding_func_raw(space_name)
74+
local record = cache.internal.get(space_name)
75+
76+
if not record or not record.raw_tuple then
7177
return nil
7278
end
7379

80+
record = record.raw_tuple
81+
7482
if record.sharding_func_body ~= nil then
7583
return {body = record.sharding_func_body}
7684
end
@@ -97,7 +105,7 @@ local function get_space_schema(space_name)
97105
space_ddl.engine = box_space.engine
98106
space_ddl.format = box_space:format()
99107
space_ddl.sharding_key = get_sharding_key(space_name)
100-
space_ddl.sharding_func = get_sharding_func(space_name)
108+
space_ddl.sharding_func = get_sharding_func_raw(space_name)
101109
for _, field in ipairs(space_ddl.format) do
102110
if field.is_nullable == nil then
103111
field.is_nullable = false
@@ -115,21 +123,20 @@ local function get_space_schema(space_name)
115123
end
116124

117125
local function prepare_sharding_func_for_call(space_name, sharding_func_def)
118-
if type(sharding_func_def) == 'string' then
119-
local sharding_func = utils.get_G_function(sharding_func_def)
126+
if sharding_func_def.error ~= nil then
127+
return nil, sharding_func_def.error
128+
end
129+
130+
if sharding_func_def.parsed_func_name ~= nil then
131+
local sharding_func = utils.get_G_function(sharding_func_def.parsed_func_name)
120132
if sharding_func ~= nil and
121133
ddl_check.internal.is_callable(sharding_func) == true then
122134
return sharding_func
123135
end
124136
end
125137

126-
if type(sharding_func_def) == 'table' then
127-
local sharding_func, err = loadstring('return ' .. sharding_func_def.body)
128-
if sharding_func == nil then
129-
return nil, string.format(
130-
"Body is incorrect in sharding_func for space (%s): %s", space_name, err)
131-
end
132-
return sharding_func()
138+
if sharding_func_def.callable ~= nil then
139+
return sharding_func_def.callable
133140
end
134141

135142
return nil, string.format(

ddl/utils.lua

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,19 @@ end
189189
-- split sharding func name in dot notation by dot
190190
-- foo.bar.baz -> chunks: foo bar baz
191191
-- foo -> chunks: foo
192+
--
193+
-- func_name parameter may be a string in dot notation or table
194+
-- if func_name type is of type table it is assumed that it is already split
192195
local function get_G_function(func_name)
193-
local chunks = string.split(func_name, '.')
194196
local sharding_func = _G
197+
local chunks
198+
199+
if type(func_name) == 'string' then
200+
chunks = string.split(func_name, '.')
201+
else
202+
chunks = func_name
203+
end
204+
195205
-- check is the each chunk an identifier
196206
for _, chunk in pairs(chunks) do
197207
if not check_name_isident(chunk) or sharding_func == nil then

test/bench_cache.lua

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
#!/usr/bin/env tarantool
2+
3+
local db = require('test.db')
4+
local ddl = require('ddl')
5+
local helper = require('test.helper')
6+
7+
local ITERS = 10000000
8+
9+
local function mpcrc32(shard_key)
10+
local digest = require('digest')
11+
if type(shard_key) ~= 'table' then
12+
return digest.crc32(tostring(shard_key))
13+
else
14+
local crc32 = digest.crc32.new()
15+
for _, v in ipairs(shard_key) do
16+
crc32:update(tostring(v))
17+
end
18+
return crc32:result()
19+
end
20+
end
21+
22+
local sharding_func_body = [[
23+
function(shard_key)
24+
local digest = require('digest')
25+
if type(shard_key) ~= 'table' then
26+
return digest.crc32(tostring(shard_key))
27+
else
28+
local crc32 = digest.crc32.new()
29+
for _, v in ipairs(shard_key) do
30+
crc32:update(tostring(v))
31+
end
32+
return crc32:result()
33+
end
34+
end
35+
]]
36+
37+
local primary_index = {
38+
type = 'HASH',
39+
unique = true,
40+
parts = {
41+
{path = 'string_nonnull', is_nullable = false, type = 'string'},
42+
{path = 'unsigned_nonnull', is_nullable = false, type = 'unsigned'},
43+
},
44+
name = 'primary'
45+
}
46+
47+
local bucket_id_idx = {
48+
type = 'TREE',
49+
unique = false,
50+
parts = {{path = 'bucket_id', type = 'unsigned', is_nullable = false}},
51+
name = 'bucket_id'
52+
}
53+
54+
local function space_init()
55+
db.drop_all()
56+
57+
local space = {
58+
engine = 'memtx',
59+
is_local = true,
60+
temporary = false,
61+
format = table.deepcopy(helper.test_space_format())
62+
}
63+
table.insert(space.format, 1, {
64+
name = 'bucket_id', type = 'unsigned', is_nullable = false
65+
})
66+
67+
space.indexes = {
68+
table.deepcopy(primary_index),
69+
table.deepcopy(bucket_id_idx)
70+
}
71+
space.sharding_key = {'unsigned_nonnull', 'integer_nonnull'}
72+
local schema = {
73+
spaces = {
74+
space = space,
75+
}
76+
}
77+
78+
return schema
79+
end
80+
81+
local function run_body()
82+
db.init()
83+
local schema = space_init()
84+
schema.spaces.space.sharding_func = {
85+
body = sharding_func_body
86+
}
87+
88+
local _, err = ddl.set_schema(schema)
89+
if err then
90+
print(err)
91+
os.exit()
92+
end
93+
94+
for i=1,ITERS do
95+
local _, err = ddl.bucket_id('space', i)
96+
if err then
97+
print(err)
98+
os.exit()
99+
end
100+
end
101+
102+
print("Done")
103+
os.exit()
104+
end
105+
106+
local function run_baseline()
107+
db.init()
108+
local schema = space_init()
109+
schema.spaces.space.sharding_func = {
110+
body = sharding_func_body
111+
}
112+
113+
local _, err = ddl.set_schema(schema)
114+
if err then
115+
print(err)
116+
os.exit()
117+
end
118+
119+
for i=1,ITERS do
120+
mpcrc32(i)
121+
if err then
122+
print(err)
123+
os.exit()
124+
end
125+
end
126+
127+
print("Done")
128+
os.exit()
129+
end
130+
131+
local function run_name()
132+
db.init()
133+
local schema = space_init()
134+
local sharding_func_name = 'mpcrc32'
135+
rawset(_G, sharding_func_name, mpcrc32)
136+
schema.spaces.space.sharding_func = sharding_func_name
137+
138+
local _, err = ddl.set_schema(schema)
139+
if err then
140+
print(err)
141+
os.exit()
142+
end
143+
144+
for i=1,ITERS do
145+
local _, err = ddl.bucket_id('space', i)
146+
if err then
147+
print(err)
148+
os.exit()
149+
end
150+
end
151+
152+
print("Done")
153+
os.exit()
154+
end
155+
156+
local function main()
157+
local getopt = require('posix.unistd').getopt
158+
159+
if #arg == 0 then
160+
print("Usage:", arg[0], "\n -l - baseline benchmark",
161+
"\n -n - function name caching benchmark",
162+
"\n -b - function body caching benchmark")
163+
os.exit()
164+
end
165+
166+
for opt, _, optind in getopt(arg, 'lnb') do
167+
if opt == '?' then
168+
return print('unrecognized option', arg[optind -1])
169+
end
170+
if opt == 'l' then
171+
run_baseline()
172+
elseif opt == 'n' then
173+
run_name()
174+
elseif opt == 'b' then
175+
run_body()
176+
end
177+
end
178+
end
179+
180+
main()

0 commit comments

Comments
 (0)