Skip to content

Commit 4342785

Browse files
committed
Prototype: change credit scaling, using nominal vs physical credit
Previously we've had a somewhat complex method to scale credits from those supplied in a level (1 per update) to the credits used in merging. The existing scheme has a number of downsides: * no clear distinction between scaled and unscaled credits, what does each one measure? * complex, needs scaling dependent of the merge-policy * always uses worst case supply of credits so often finishes early * rounding errors compound problem of credit over-supply * no satisfactory way to check we do not over-supply credits * contributing credits to a merge from multiple handles will cause the merge to finish earlier than any of the handles needs it to finish, thus doing work more eagerly than necessary. The new scheme involves: * distinguishing physical vs nominal credits, with a clear definition for each measure * converting between nominal and physical without merge policy * physical debt is no longer worst case but matches actual cost * integer rounding errors do not compound * we can assert that we reach the nominal and physical debt totals at the same moment (when the merge is done) and thus we can assert no over-supply of physical credits. * we can avoid supplying credits too quickly to a merge that is shared between multiple handles, each one only pushes it as far as it needs.
1 parent 6729bad commit 4342785

File tree

2 files changed

+123
-67
lines changed

2 files changed

+123
-67
lines changed

prototypes/ScheduledMerges.hs

Lines changed: 120 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ module ScheduledMerges (
6363
LevelMergeType(..),
6464
MergeCredit(..),
6565
MergeDebt(..),
66+
NominalCredit(..),
67+
NominalDebt(..),
6668
Run,
6769
runSize,
6870
supplyCreditsMergingTree,
@@ -123,7 +125,9 @@ data Level s = Level !(IncomingRun s) ![Run]
123125
-- single run without having to read the 'STRef', and secondly to make it easier
124126
-- to avoid supplying merge credits. It's not essential, but simplifies things
125127
-- somewhat.
126-
data IncomingRun s = Merging !MergePolicy !(MergingRun LevelMergeType s)
128+
data IncomingRun s = Merging !MergePolicy
129+
!NominalDebt !(STRef s NominalCredit)
130+
!(MergingRun LevelMergeType s)
127131
| Single !Run
128132

129133
-- | The merge policy for a LSM level can be either tiering or levelling.
@@ -321,7 +325,7 @@ invariant (LSMContent _ levels ul) = do
321325
mrs <- case ir of
322326
Single r ->
323327
return (CompletedMerge r)
324-
Merging mp (MergingRun mt _ ref) -> do
328+
Merging mp _ _ (MergingRun mt _ ref) -> do
325329
assertST $ mp == mergePolicyForLevel ln ls ul
326330
&& mt == mergeTypeForLevel ls ul
327331
readSTRef ref
@@ -520,7 +524,9 @@ assertST p = assert p $ return ()
520524
--
521525

522526
-- | Credits for keeping track of merge progress. These credits
523-
-- correspond directly to merge steps performed.
527+
-- correspond directly to merge steps performed. We also call these \"physical\"
528+
-- credits (since they correspond to steps done), and as opposed to \"nominal\"
529+
-- credits in 'NominalCredit' and 'NominalDebt'.
524530
type Credit = Int
525531

526532
-- | Debt for keeping track of the total merge work to do.
@@ -586,6 +592,8 @@ paydownMergeDebt :: MergeDebt -> MergeCredit -> Credit -> MergeDebtPaydown
586592
paydownMergeDebt MergeDebt {totalDebt}
587593
MergeCredit {spentCredits, unspentCredits}
588594
c
595+
| assert (c >= 0) False = undefined
596+
589597
| let !suppliedCredits' = suppliedCredits + c
590598
, suppliedCredits' >= totalDebt
591599
, let !leftover = suppliedCredits' - totalDebt
@@ -633,19 +641,18 @@ mergeBatchSize = 32
633641
-- Merging run abstraction
634642
--
635643

636-
newMergingRun :: IsMergeType t => Maybe Debt -> t -> [Run] -> ST s (MergingRun t s)
637-
newMergingRun mdebt mergeType runs = do
644+
newMergingRun :: IsMergeType t => t -> [Run] -> ST s (MergingRun t s)
645+
newMergingRun mergeType runs = do
638646
assertST $ length runs > 1
639647
-- in some cases, no merging is required at all
640648
(debt, state) <- case filter (\r -> runSize r > 0) runs of
641649
[] -> let (r:_) = runs -- just re-use the empty input
642650
in return (runSize r, CompletedMerge r)
643651
[r] -> return (runSize r, CompletedMerge r)
644652
rs -> do
645-
let !cost = sum (map runSize rs)
646-
!debt = case mdebt of
647-
Nothing -> cost
648-
Just d -> assert (d >= cost) d
653+
-- The (physical) debt is always exactly the cost (merge steps),
654+
-- which is the sum of run lengths in elements.
655+
let !debt = sum (map runSize rs)
649656
let merged = mergek mergeType rs -- deliberately lazy
650657
return (debt, OngoingMerge zeroMergeCredit rs merged)
651658
MergingRun mergeType (MergeDebt debt) <$> newSTRef state
@@ -708,6 +715,15 @@ supplyCreditsMergingRun =
708715
writeSTRef ref (OngoingMerge mergeCredit' rs r)
709716
return 0
710717

718+
suppliedCreditMergingRun :: MergingRun t s -> ST s Credit
719+
suppliedCreditMergingRun (MergingRun _ d ref) =
720+
readSTRef ref >>= \case
721+
CompletedMerge{} ->
722+
let MergeDebt { totalDebt } = d in
723+
return totalDebt
724+
OngoingMerge MergeCredit {spentCredits, unspentCredits} _ _ ->
725+
return (spentCredits + unspentCredits)
726+
711727
-------------------------------------------------------------------------------
712728
-- LSM handle
713729
--
@@ -750,7 +766,7 @@ update tr (LSMHandle scr lsmr) k op = do
750766
sc <- readSTRef scr
751767
content@(LSMContent wb ls unionLevel) <- readSTRef lsmr
752768
modifySTRef' scr (+1)
753-
supplyCreditsLevels 1 ls
769+
supplyCreditsLevels (NominalCredit 1) ls
754770
invariant content
755771
let wb' = Map.insertWith combine k op wb
756772
if bufferSize wb' >= maxBufferSize
@@ -762,7 +778,7 @@ update tr (LSMHandle scr lsmr) k op = do
762778
else
763779
writeSTRef lsmr (LSMContent wb' ls unionLevel)
764780

765-
supplyMergeCredits :: LSM s -> Credit -> ST s ()
781+
supplyMergeCredits :: LSM s -> NominalCredit -> ST s ()
766782
supplyMergeCredits (LSMHandle scr lsmr) credits = do
767783
content@(LSMContent _ ls _) <- readSTRef lsmr
768784
modifySTRef' scr (+1)
@@ -965,48 +981,79 @@ lookupsTree k = go
965981
lookupBatch' = lookupBatch Nothing k
966982

967983
-------------------------------------------------------------------------------
968-
-- Updates
984+
-- Nominal credits
969985
--
970986

987+
newtype NominalCredit = NominalCredit Credit
988+
deriving stock Show
989+
990+
newtype NominalDebt = NominalDebt Credit
991+
deriving stock Show
992+
971993
-- TODO: If there is a UnionLevel, there is no (more expensive) last level merge
972994
-- in the regular levels, so a little less merging work is required than if
973995
-- there was no UnionLevel. It might be a good idea to spend this "saved" work
974996
-- on the UnionLevel instead. This makes future lookups cheaper and ensures that
975997
-- we can get rid of the UnionLevel at some point, even if a user just keeps
976998
-- inserting without calling 'supplyUnionCredits'.
977-
supplyCreditsLevels :: Credit -> Levels s -> ST s ()
978-
supplyCreditsLevels unscaled =
999+
supplyCreditsLevels :: NominalCredit -> Levels s -> ST s ()
1000+
supplyCreditsLevels nominalDeposit =
9791001
traverse_ $ \(Level ir _rs) -> do
9801002
case ir of
9811003
Single{} -> return ()
982-
Merging mp mr -> do
983-
factor <- creditsForMerge mp mr
984-
let credits = ceiling (fromIntegral unscaled * factor)
985-
when (credits > 0) $ do
986-
_ <- supplyCreditsMergingRun credits mr
987-
-- we don't mind leftover credits, each level completes independently
988-
return ()
989-
990-
-- | The general case (and thus worst case) of how many merge credits we need
991-
-- for a level. This is based on the merging policy at the level.
992-
--
993-
creditsForMerge :: MergePolicy -> MergingRun t s -> ST s Rational
994-
995-
-- A levelling merge has 1 input run and one resident run, which is (up to) 4x
996-
-- bigger than the others.
997-
-- It needs to be completed before another run comes in.
998-
creditsForMerge MergePolicyLevelling _ =
999-
return $ (1 + 4) / 1
1004+
Merging _mp nominalDebt nominalCreditVar
1005+
mr@(MergingRun _ physicalDebt _) -> do
1006+
1007+
nominalCredit <- depositNominalCredit
1008+
nominalDebt nominalCreditVar nominalDeposit
1009+
physicalCredit <- suppliedCreditMergingRun mr
1010+
let !physicalCredit' = scaleNominalToPhysicalCredit
1011+
nominalDebt physicalDebt nominalCredit
1012+
-- Our target physicalCredit' could actually be less than the
1013+
-- actual current physicalCredit if other tables were contributing
1014+
-- credits to the shared merge.
1015+
!physicalDeposit = physicalCredit' - physicalCredit
1016+
1017+
-- So we may have a zero or negative deposit, which we ignore.
1018+
when (physicalDeposit > 0) $ do
1019+
leftoverCredits <- supplyCreditsMergingRun physicalDeposit mr
1020+
-- For merges at ordinary levels (not unions) we expect to hit the
1021+
-- debt limit exactly and never exceed it.
1022+
assert (leftoverCredits == 0) $ return ()
1023+
1024+
scaleNominalToPhysicalCredit ::
1025+
NominalDebt
1026+
-> MergeDebt
1027+
-> NominalCredit
1028+
-> Credit
1029+
scaleNominalToPhysicalCredit (NominalDebt nominalDebt)
1030+
MergeDebt { totalDebt = physicalDebt }
1031+
(NominalCredit nominalCredit) =
1032+
floor $ toRational nominalCredit * toRational physicalDebt
1033+
/ toRational nominalDebt
1034+
-- This specification using Rational as an intermediate representation can
1035+
-- be implemented efficiently using only integer operations.
1036+
1037+
depositNominalCredit ::
1038+
NominalDebt
1039+
-> STRef s NominalCredit
1040+
-> NominalCredit
1041+
-> ST s NominalCredit
1042+
depositNominalCredit (NominalDebt nominalDebt)
1043+
nominalCreditVar
1044+
(NominalCredit deposit) = do
1045+
NominalCredit before <- readSTRef nominalCreditVar
1046+
-- Depositing _could_ leave the credit higher than the debt, because
1047+
-- sometimes under-full runs mean we don't shuffle runs down the levels
1048+
-- as quickly as the worst case. So here we do just drop excess nominal
1049+
-- credits.
1050+
let !after = NominalCredit (min (before + deposit) nominalDebt)
1051+
writeSTRef nominalCreditVar after
1052+
return after
10001053

1001-
-- A tiering merge has 5 runs at most (once could be held back to merged again)
1002-
-- and must be completed before the level is full (once 4 more runs come in).
1003-
creditsForMerge MergePolicyTiering (MergingRun _ _ ref) = do
1004-
readSTRef ref >>= \case
1005-
CompletedMerge _ -> return 0
1006-
OngoingMerge _ rs _ -> do
1007-
let numRuns = length rs
1008-
assertST $ numRuns `elem` [4, 5]
1009-
return $ fromIntegral numRuns / 4
1054+
-------------------------------------------------------------------------------
1055+
-- Updates
1056+
--
10101057

10111058
increment :: forall s. Tracer (ST s) Event
10121059
-> Counter -> Run -> Levels s -> UnionLevel s -> ST s (Levels s)
@@ -1028,7 +1075,7 @@ increment tr sc run0 ls0 ul = do
10281075
go !ln incoming (Level ir rs : ls) = do
10291076
r <- case ir of
10301077
Single r -> return r
1031-
Merging mergePolicy mr -> do
1078+
Merging mergePolicy _ _ mr -> do
10321079
r <- expectCompletedMergingRun mr
10331080
traceWith tr' MergeCompletedEvent {
10341081
mergePolicy,
@@ -1087,26 +1134,37 @@ newLevelMerge :: Tracer (ST s) EventDetail
10871134
-> [Run] -> ST s (IncomingRun s)
10881135
newLevelMerge _ _ _ _ [r] = return (Single r)
10891136
newLevelMerge tr level mergePolicy mergeType rs = do
1137+
assertST (length rs `elem` [4, 5])
1138+
mergingRun@(MergingRun _ physicalDebt _) <- newMergingRun mergeType rs
1139+
assertST (totalDebt physicalDebt <= maxPhysicalDebt)
10901140
traceWith tr MergeStartedEvent {
10911141
mergePolicy,
10921142
mergeType,
1093-
mergeDebt = debt,
1143+
mergeDebt = totalDebt physicalDebt,
10941144
mergeRunsSize = map runSize rs
10951145
}
1096-
assertST (length rs `elem` [4, 5])
1097-
Merging mergePolicy <$> newMergingRun (Just debt) mergeType rs
1146+
nominalCreditVar <- newSTRef (NominalCredit 0)
1147+
pure (Merging mergePolicy nominalDebt nominalCreditVar mergingRun)
10981148
where
1099-
-- How much we need to discharge before the merge can be guaranteed
1100-
-- complete. More precisely, this is the maximum amount a merge at this
1101-
-- level could need. While the real @cost@ of a merge would lead to merges
1102-
-- finishing early, the overestimation @debt@ means that in this prototype
1103-
-- merges will only complete at the last possible moment.
1104-
-- Note that for levelling this is includes the single run in the current
1105-
-- level.
1106-
debt = case mergePolicy of
1107-
MergePolicyLevelling -> 4 * tieringRunSize (level-1)
1108-
+ levellingRunSize level
1109-
MergePolicyTiering -> length rs * tieringRunSize (level-1)
1149+
-- The nominal debt equals the minimum of credits we will supply before we
1150+
-- expect the merge to complete. This is the same as the number of updates.
1151+
nominalDebt = NominalDebt (tieringRunSize level)
1152+
1153+
-- The physical debt is the number of actual merge steps we will need to
1154+
-- perform before the merge is complete. This is always the sum of the
1155+
-- lengths of the input runs.
1156+
--
1157+
-- As we supply nominal credit, we scale them and supply physical credits,
1158+
-- such that we pay off the physical and nominal debts at the same time.
1159+
--
1160+
-- We can bound the worst case physical debt: this is the maximum amount of
1161+
-- steps a merge at this level could need. Note that for levelling this is
1162+
-- includes the single run in the current level.
1163+
maxPhysicalDebt =
1164+
case mergePolicy of
1165+
MergePolicyLevelling -> 4 * tieringRunSize (level-1)
1166+
+ levellingRunSize level
1167+
MergePolicyTiering -> length rs * tieringRunSize (level-1)
11101168

11111169
-- | Only based on run count, not their sizes.
11121170
tieringLevelIsFull :: Int -> [Run] -> [Run] -> Bool
@@ -1172,8 +1230,8 @@ newPendingLevelMerge irs tree = do
11721230
st = PendingTreeMerge (PendingLevelMerge prs tree)
11731231
Just . MergingTree <$> newSTRef st
11741232
where
1175-
incomingToPreExistingRun (Single r) = PreExistingRun r
1176-
incomingToPreExistingRun (Merging _ mr) = PreExistingMergingRun mr
1233+
incomingToPreExistingRun (Single r) = PreExistingRun r
1234+
incomingToPreExistingRun (Merging _ _ _ mr) = PreExistingMergingRun mr
11771235

11781236
-- | Ensures that the merge contains more than one input.
11791237
newPendingUnionMerge :: [MergingTree s] -> ST s (Maybe (MergingTree s))
@@ -1286,12 +1344,10 @@ supplyCreditsMergingTreeState credits !state = do
12861344
else do
12871345
-- all children must be done, create new merge!
12881346
(mergeType, rs) <- expectCompletedChildren pm
1289-
-- no reason to claim a larger debt than sum of run sizes
1290-
let debt = Nothing
12911347
case rs of
12921348
[r] -> return (c', CompletedTreeMerge r)
12931349
_ -> do
1294-
state' <- OngoingTreeMerge <$> newMergingRun debt mergeType rs
1350+
state' <- OngoingTreeMerge <$> newMergingRun mergeType rs
12951351
-- use any remaining credits to progress the new merge
12961352
supplyCreditsMergingTreeState c' state'
12971353

@@ -1368,8 +1424,8 @@ flattenLevel (Level ir rs) = (++ rs) <$> flattenIncomingRun ir
13681424

13691425
flattenIncomingRun :: IncomingRun s -> ST s [Run]
13701426
flattenIncomingRun = \case
1371-
Single r -> return [r]
1372-
Merging _ mr -> flattenMergingRun mr
1427+
Single r -> return [r]
1428+
Merging _ _ _ mr -> flattenMergingRun mr
13731429

13741430
flattenMergingRun :: MergingRun t s -> ST s [Run]
13751431
flattenMergingRun (MergingRun _ _ ref) = do
@@ -1431,7 +1487,7 @@ dumpRepresentation (LSMHandle _ lsmr) = do
14311487
dumpLevel :: Level s -> ST s LevelRepresentation
14321488
dumpLevel (Level (Single r) rs) =
14331489
return (Nothing, (r:rs))
1434-
dumpLevel (Level (Merging mp (MergingRun mt _ ref)) rs) = do
1490+
dumpLevel (Level (Merging mp _nd _nc (MergingRun mt _ ref)) rs) = do
14351491
mrs <- readSTRef ref
14361492
return (Just (mp, mt, mrs), rs)
14371493

prototypes/ScheduledMergesTest.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ test_regression_empty_run =
7373
]
7474

7575
-- finish merge
76-
LSM.supplyMergeCredits lsm 16
76+
LSM.supplyMergeCredits lsm (NominalCredit 16)
7777

7878
expectShape lsm
7979
0
@@ -143,7 +143,7 @@ test_merge_again_with_incoming =
143143
]
144144

145145
-- complete the merge (20 entries, but credits get scaled up by 1.25)
146-
LSM.supplyMergeCredits lsm 16
146+
LSM.supplyMergeCredits lsm (NominalCredit 16)
147147

148148
expectShape lsm
149149
0
@@ -272,7 +272,7 @@ fromP (PMergingRun m) = PreExistingMergingRun <$> fromM m
272272
fromM :: IsMergeType t => M t -> ST s (MergingRun t s)
273273
fromM m = do
274274
let (mergeType, mergeDebt, state) = case m of
275-
MCompleted mt md r -> (mt, md, CompletedMerge r)
275+
MCompleted mt md r -> (mt, md, CompletedMerge r)
276276
MOngoing mt md mc rs -> (mt, md, OngoingMerge mc rs' (mergek mt rs'))
277277
where rs' = map getNonEmptyRun rs
278278
MergingRun mergeType mergeDebt <$> newSTRef state

0 commit comments

Comments
 (0)