Skip to content

Commit 9b0fc46

Browse files
committed
Add caching of sharding function
The ddl.bucket_id() function needs to know a sharding function. It is costly to obtain the function declaration / definition stored in the _ddl_sharding_func space. Cache contains: raw_tuple - raw sharding metadata, used for get_schema() parsed_func_name - parsed dot notation (like {'foo', 'bar'}) callable - function ready to call, this offloads using of loadstring() error - string with an error: not nil only if setting callable fails Cache will be rebuilded if: * _ddl_sharding_func space changed: cache sets _ddl_sharding_func:on_replace trigger * schema changed: cache checks box.internal.schema_version changes This patch does not serve hot reload techniques. This entails an on_replace trigger duplication if hot reload occurs. Hot reload support will be done in separate task: #87 Benchmarks - 10000000 bucket_id() calls (file test/bench_cache.lua): Baseline (no DDL): 3.38s user 0.01s system 99% cpu 3.389 total After this patch: DDL with function body: 3.81s user 0.01s system 99% cpu 3.818 total DDL with function name: 5.49s user 0.00s system 99% cpu 5.495 total Before patch: DDL with function body: 55.95s user 0.40s system 99% cpu 56.354 total DDL with function name: 13.68s user 0.13s system 99% cpu 13.807 total Closes #82
1 parent 4f0fbd1 commit 9b0fc46

File tree

7 files changed

+518
-13
lines changed

7 files changed

+518
-13
lines changed

.github/workflows/test.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,31 @@ jobs:
6666
# Cleanup cached paths
6767
- run: tarantoolctl rocks remove cartridge
6868
- run: tarantoolctl rocks remove ddl
69+
benchmark:
70+
strategy:
71+
fail-fast: false
72+
matrix:
73+
tarantool: ['1.10', '2.5', '2.6', '2.7']
74+
coveralls: [false]
75+
include:
76+
- tarantool: '2.8'
77+
coveralls: true
78+
runs-on: [ubuntu-latest]
79+
steps:
80+
- uses: actions/checkout@v2
81+
- uses: tarantool/setup-tarantool@v1
82+
with:
83+
tarantool-version: ${{ matrix.tarantool }}
84+
85+
# Setup luatest
86+
- name: Cache rocks
87+
uses: actions/cache@v2
88+
id: cache-rocks
89+
with:
90+
path: .rocks/
91+
key: cache-rocks-${{ matrix.runs-on }}-06
92+
-
93+
run: tarantoolctl rocks install luatest
94+
if: steps.cache-rocks.outputs.cache-hit != 'true'
95+
96+
- run: tarantool ./test/bench_cache.lua

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ bucket identifier (number)
8989
- Calculate bucket id for a specified space and sharding key.
9090
Method uses sharding function specified in DDL schema.
9191

92+
Method is not transactional in the sense that it catches up
93+
_ddl_sharding_func changes immediatelly: it may see changes that're
94+
not committed yet and may see a state from another transaction,
95+
which should not be visible in the current transaction.
96+
9297
Return values: bucket_id if no error, otherwise return `nil, err`
9398

9499
## Input data format

ddl/cache.lua

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
local cache = nil
2+
3+
local SPACE_NAME_IDX = 1
4+
local SHARD_FUNC_NAME_IDX = 2
5+
local SHARD_FUNC_BODY_IDX = 3
6+
7+
-- Build cache.
8+
--
9+
-- We don't need to call this function with any type of locking:
10+
-- _ddl_sharding_func is memtx space, so calling :pairs() on it
11+
-- is atomic
12+
--
13+
-- Cache structure format:
14+
--
15+
-- cache = {
16+
-- spaces = {
17+
-- [space_name] = {
18+
-- -- raw sharding metadata, used for ddl.get()
19+
-- raw_tuple = <tuple object> (<nil> at error),
20+
-- -- parsed dot notation (like {'foo', 'bar'})
21+
-- parsed_func_name = <table> or <nil>
22+
-- -- a function ready to call
23+
-- callable = <function> or <nil>,
24+
-- -- string with an error: not nil only if setting callable fails
25+
-- error = <string> or <nil>,
26+
-- }
27+
-- },
28+
-- -- current schema version
29+
-- schema_version = <...>,
30+
-- }
31+
32+
-- function returns nothing
33+
local function cache_build()
34+
-- clear cache
35+
cache.spaces = {}
36+
37+
if box.space._ddl_sharding_func == nil then
38+
return
39+
end
40+
41+
for _, tuple in box.space._ddl_sharding_func:pairs() do
42+
local space_name = tuple[SPACE_NAME_IDX]
43+
local func_name = tuple[SHARD_FUNC_NAME_IDX]
44+
local func_body = tuple[SHARD_FUNC_BODY_IDX]
45+
46+
cache.spaces[space_name] = {
47+
raw_tuple = tuple
48+
}
49+
50+
if func_body ~= nil then
51+
local sharding_func, err = loadstring('return ' .. func_body)
52+
if sharding_func == nil then
53+
cache.spaces[space_name].error =
54+
string.format("Body is incorrect in sharding_func for space (%s): %s",
55+
space_name, err)
56+
else
57+
cache.spaces[space_name].callable = sharding_func()
58+
end
59+
elseif func_name ~= nil then
60+
-- we cannot save the function itself into the cache,
61+
-- because the function can be changed in runtime and
62+
-- there is no way to catch this change
63+
local chunks = string.split(func_name, '.')
64+
cache.spaces[space_name].parsed_func_name = chunks
65+
end
66+
end
67+
68+
cache.schema_version = box.internal.schema_version()
69+
70+
end
71+
72+
-- Rebuild cache if _ddl_sharding_func space changed.
73+
local function cache_set_trigger()
74+
if box.space._ddl_sharding_func == nil then
75+
return
76+
end
77+
78+
local trigger_found = false
79+
80+
for _, func in pairs(box.space._ddl_sharding_func:on_replace()) do
81+
if func == cache_build then
82+
trigger_found = true
83+
break
84+
end
85+
end
86+
87+
if not trigger_found then
88+
box.space._ddl_sharding_func:on_replace(cache_build)
89+
end
90+
end
91+
92+
-- Get data from cache.
93+
-- Returns all cached data for "space_name" or nil.
94+
local function cache_get(space_name)
95+
if space_name == nil then
96+
return nil
97+
end
98+
99+
-- using tarantool internal API.
100+
-- this is not reliable, but it is the only way to track
101+
-- schema_version changes. Fix it if a public method appears:
102+
-- https://github.com/tarantool/tarantool/issues/6544
103+
local schema_version = box.internal.schema_version()
104+
105+
if not cache then
106+
cache = {}
107+
cache_build()
108+
cache_set_trigger()
109+
end
110+
111+
-- rebuild cache if database schema changed
112+
if schema_version ~= cache.schema_version then
113+
cache_build()
114+
cache_set_trigger()
115+
end
116+
117+
return cache.spaces[space_name]
118+
end
119+
120+
return {
121+
internal = {
122+
get = cache_get,
123+
}
124+
}

ddl/get.lua

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
local utils = require('ddl.utils')
2+
local cache = require('ddl.cache')
23
local ddl_check = require('ddl.check')
34

45
local function _get_index_field_path(space, index_part)
@@ -66,11 +67,18 @@ local function get_metadata(space_name, metadata_name)
6667
end
6768

6869
local function get_sharding_func(space_name)
69-
local record = get_metadata(space_name, "sharding_func")
70-
if not record then
70+
return cache.internal.get(space_name)
71+
end
72+
73+
local function get_sharding_func_raw(space_name)
74+
local record = cache.internal.get(space_name)
75+
76+
if not record or not record.raw_tuple then
7177
return nil
7278
end
7379

80+
record = record.raw_tuple
81+
7482
if record.sharding_func_body ~= nil then
7583
return {body = record.sharding_func_body}
7684
end
@@ -97,7 +105,7 @@ local function get_space_schema(space_name)
97105
space_ddl.engine = box_space.engine
98106
space_ddl.format = box_space:format()
99107
space_ddl.sharding_key = get_sharding_key(space_name)
100-
space_ddl.sharding_func = get_sharding_func(space_name)
108+
space_ddl.sharding_func = get_sharding_func_raw(space_name)
101109
for _, field in ipairs(space_ddl.format) do
102110
if field.is_nullable == nil then
103111
field.is_nullable = false
@@ -115,21 +123,20 @@ local function get_space_schema(space_name)
115123
end
116124

117125
local function prepare_sharding_func_for_call(space_name, sharding_func_def)
118-
if type(sharding_func_def) == 'string' then
119-
local sharding_func = utils.get_G_function(sharding_func_def)
126+
if sharding_func_def.error ~= nil then
127+
return nil, sharding_func_def.error
128+
end
129+
130+
if sharding_func_def.parsed_func_name ~= nil then
131+
local sharding_func = utils.get_G_function(sharding_func_def.parsed_func_name)
120132
if sharding_func ~= nil and
121133
ddl_check.internal.is_callable(sharding_func) == true then
122134
return sharding_func
123135
end
124136
end
125137

126-
if type(sharding_func_def) == 'table' then
127-
local sharding_func, err = loadstring('return ' .. sharding_func_def.body)
128-
if sharding_func == nil then
129-
return nil, string.format(
130-
"Body is incorrect in sharding_func for space (%s): %s", space_name, err)
131-
end
132-
return sharding_func()
138+
if sharding_func_def.callable ~= nil then
139+
return sharding_func_def.callable
133140
end
134141

135142
return nil, string.format(

ddl/utils.lua

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,19 @@ end
189189
-- split sharding func name in dot notation by dot
190190
-- foo.bar.baz -> chunks: foo bar baz
191191
-- foo -> chunks: foo
192+
--
193+
-- func_name parameter may be a string in dot notation or table
194+
-- if func_name type is of type table it is assumed that it is already split
192195
local function get_G_function(func_name)
193-
local chunks = string.split(func_name, '.')
194196
local sharding_func = _G
197+
local chunks
198+
199+
if type(func_name) == 'string' then
200+
chunks = string.split(func_name, '.')
201+
else
202+
chunks = func_name
203+
end
204+
195205
-- check is the each chunk an identifier
196206
for _, chunk in pairs(chunks) do
197207
if not check_name_isident(chunk) or sharding_func == nil then

0 commit comments

Comments
 (0)