From b3ec628e72ba929ec8b20ce38185a9c99cdda914 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 13 Feb 2025 11:32:11 +0000 Subject: [PATCH 1/5] PR 554 follow-up: update a few comments Typos and stale / inaccurate comments. --- src/Database/LSMTree/Internal/MergingRun.hs | 5 +++-- src/Database/LSMTree/Internal/Snapshot.hs | 6 ++---- test/Test/Database/LSMTree/Internal/MergingRun.hs | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Database/LSMTree/Internal/MergingRun.hs b/src/Database/LSMTree/Internal/MergingRun.hs index b9049e497..5a43ced23 100644 --- a/src/Database/LSMTree/Internal/MergingRun.hs +++ b/src/Database/LSMTree/Internal/MergingRun.hs @@ -267,7 +267,7 @@ work to do). The implementation is similar but somewhat more complex. We also accumulate unspent credits until they reach a threshold at which point we do a batch of merging work. Unlike the prototype, the implementation tracks both credits -spent credits as yet unspent. We will elaborate on why and how below. +spent and credits as yet unspent. We will elaborate on why and how below. In the prototype, the credits spent equals the merge steps performed. The same holds in the real implementation, but making it so is more complicated. @@ -296,7 +296,8 @@ Thus we track two things: * credits unspent ('UnspentCredits'): credits supplied that are not yet spent and are thus available to spend. -The credits supplied is the sum of the credits spent and unspent. +The credits supplied is the sum of the credits spent and unspent. We guarantee +that the supplied credits never exceeds the total debt. The credits spent and the steps performed (or in the process of being performed) will typically be equal. They are not guaranteed to be equal in the diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index 93674b77a..72033e9f0 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -194,10 +194,8 @@ toSnapIncomingRun :: -> m (SnapIncomingRun (Ref (Run m h))) toSnapIncomingRun (Single r) = pure (SnapSingleRun r) toSnapIncomingRun (Merging mergePolicy mergingRun) = do - -- We need to know how many credits were spend and yet unspent so we can - -- restore merge work on snapshot load. No need to snapshot the contents - -- of totalStepsVar here, since we still start counting from 0 again when - -- loading the snapshot. + -- We need to know how many credits were supplied so we can restore merge + -- work on snapshot load. (mergingRunState, MR.SuppliedCredits (MR.Credits suppliedCredits), mergeNumRuns, diff --git a/test/Test/Database/LSMTree/Internal/MergingRun.hs b/test/Test/Database/LSMTree/Internal/MergingRun.hs index 52e3759ef..2f91c2994 100644 --- a/test/Test/Database/LSMTree/Internal/MergingRun.hs +++ b/test/Test/Database/LSMTree/Internal/MergingRun.hs @@ -13,7 +13,7 @@ tests = testGroup "Test.Database.LSMTree.Internal.MergingRun" ] -- | The representation of CreditsPair should round trip properly. This is --- non-trivial because it uses a packed bit the representation. +-- non-trivial because it uses a packed bitfield representation. -- prop_CreditsPair :: SpentCredits -> UnspentCredits -> Property prop_CreditsPair spentCredits unspentCredits = From 2a9cd7c89cdaa650c840522a2ffb9dc27165bbc4 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 13 Feb 2025 11:36:06 +0000 Subject: [PATCH 2/5] PR 554 follow-up: reduce newtype conversion for CreditThreshold by making it a newtype for UnspentCredits rather than Credits, so when unwrapping one layer its still compatible with other credit values and we can perform operations. --- src/Database/LSMTree/Internal/MergeSchedule.hs | 2 +- src/Database/LSMTree/Internal/MergingRun.hs | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Database/LSMTree/Internal/MergeSchedule.hs b/src/Database/LSMTree/Internal/MergeSchedule.hs index 22c358458..a45d36e56 100644 --- a/src/Database/LSMTree/Internal/MergeSchedule.hs +++ b/src/Database/LSMTree/Internal/MergeSchedule.hs @@ -915,4 +915,4 @@ scaleCreditsForMerge LevelTiering mr (Credits c) = creditThresholdForLevel :: TableConfig -> LevelNo -> MR.CreditThreshold creditThresholdForLevel conf (LevelNo _i) = let AllocNumEntries (NumEntries x) = confWriteBufferAlloc conf - in MR.CreditThreshold (MR.Credits x) + in MR.CreditThreshold (MR.UnspentCredits (MR.Credits x)) diff --git a/src/Database/LSMTree/Internal/MergingRun.hs b/src/Database/LSMTree/Internal/MergingRun.hs index 5a43ced23..4fa393b48 100644 --- a/src/Database/LSMTree/Internal/MergingRun.hs +++ b/src/Database/LSMTree/Internal/MergingRun.hs @@ -331,7 +331,7 @@ numEntriesToTotalDebt (NumEntries n) = Credits n -- Note that ideally the batch size for different LSM levels should be -- co-prime so that merge work at different levels is not synchronised. -- -newtype CreditThreshold = CreditThreshold Credits +newtype CreditThreshold = CreditThreshold UnspentCredits -- | The supplied credits is simply the sum of all the credits that have been -- (successfully) supplied to a merging run via 'supplyCredits'. @@ -560,8 +560,8 @@ atomicDepositAndSpendCredits (CreditsVar !var) !totalDebt -- 2. not case 1, but enough unspent credits have accumulated to do -- a batch of merge work; - | (\(UnspentCredits x)->x) unspent' >= batchThreshold - = spendBatchCredits spent unspent' + | unspent' >= batchThreshold + = spendBatchCredits spent unspent' batchThreshold -- 3. not case 1 or 2, not enough credits to do any merge work. | otherwise @@ -588,14 +588,15 @@ atomicDepositAndSpendCredits (CreditsVar !var) !totalDebt assert (leftover >= 0) $ (supplied', UnspentCredits unspent', leftover) - spendBatchCredits (SpentCredits !spent) (UnspentCredits !unspent) = + spendBatchCredits (SpentCredits !spent) (UnspentCredits !unspent) + (UnspentCredits unspentBatchThreshold) = -- numBatches may be zero, in which case the result will be zero - let !nBatches = unspent `div` batchThreshold - !spend = nBatches * batchThreshold + let !nBatches = unspent `div` unspentBatchThreshold + !spend = nBatches * unspentBatchThreshold !spent' = spent + spend !unspent' = unspent - spend in assert (spend >= 0) $ - assert (unspent' < batchThreshold) $ + assert (unspent' < unspentBatchThreshold) $ assert (spent' + unspent' == spent + unspent) $ (spend, SpentCredits spent', UnspentCredits unspent') From f2d5bde2671f6ad12798f42b1d4c8d58fd1c37a9 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 13 Feb 2025 11:40:16 +0000 Subject: [PATCH 3/5] PR 554 follow-up: remove a done TODO and update a remaining TODO We do now check that we do not do too much merging work. But we could still supply excess credits leading to unexpected leftover credits. Add TODOs in the right places about asserting there are no leftover credits. We can't assert this yet because there are in fact leftovers because we do not spread out merging work fully, in the case that runs are less than worst case in size. --- src/Database/LSMTree/Internal/MergeSchedule.hs | 6 +++++- src/Database/LSMTree/Internal/MergingRun.hs | 4 ---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Database/LSMTree/Internal/MergeSchedule.hs b/src/Database/LSMTree/Internal/MergeSchedule.hs index a45d36e56..fc32cf817 100644 --- a/src/Database/LSMTree/Internal/MergeSchedule.hs +++ b/src/Database/LSMTree/Internal/MergeSchedule.hs @@ -746,7 +746,8 @@ addRunToLevels tr conf@TableConfig{..} resolve hfs hbio root uc r0 reg levels ul OneShot -> do let !required = MR.Credits (unNumEntries (V.foldMap' Run.size rs)) let !thresh = creditThresholdForLevel conf ln - _leftoverCredits <- MR.supplyCredits mr thresh required + leftoverCredits <- MR.supplyCredits mr thresh required + assert (leftoverCredits == 0) $ return () -- This ensures the merge is really completed. However, we don't -- release the merge yet and only briefly inspect the resulting run. bracket (MR.expectCompleted mr) releaseRef $ \r -> @@ -878,6 +879,9 @@ supplyCredits conf c levels = let !c' = scaleCreditsForMerge mp mr c let !thresh = creditThresholdForLevel conf ln _leftoverCredits <- MR.supplyCredits mr thresh c' + --TODO: assert leftoverCredits == 0 + -- to assert that we did not finished the merge too early, + -- and thus have spread the work out evenly. return () -- | Scale a number of credits to a number of merge steps to be performed, based diff --git a/src/Database/LSMTree/Internal/MergingRun.hs b/src/Database/LSMTree/Internal/MergingRun.hs index 4fa393b48..ed6931287 100644 --- a/src/Database/LSMTree/Internal/MergingRun.hs +++ b/src/Database/LSMTree/Internal/MergingRun.hs @@ -776,10 +776,6 @@ expectCompleted (DeRef MergingRun {..}) = do -- between performMergeSteps and completeMerge here, and above. weFinishedMerge <- performMergeSteps mergeState mergeCreditsVar credits when weFinishedMerge $ completeMerge mergeState mergeKnownCompleted - -- TODO: can we think of a check to see if we did not do too much work - -- here? <-- assert (suppliedCredits == totalDebt) ought to do it! - -- A related question is if we finished the merge too early, could have - -- spread out the work better. withMVar mergeState $ \case CompletedMerge r -> dupRef r -- return a fresh reference to the run OngoingMerge{} -> do From 7b0d85a869e9dcb4ab764f429d360722abc5fc93 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 13 Feb 2025 11:43:30 +0000 Subject: [PATCH 4/5] PR 554 follow-up: add TODOs about Run refs during snapshotting These are not concurrency safe. The merging runs could complete during snapshot persistence leading to use after free. Add TODOs to explain this and to suggest a solution. --- src/Database/LSMTree/Internal/MergingRun.hs | 8 ++++++++ src/Database/LSMTree/Internal/Snapshot.hs | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/Database/LSMTree/Internal/MergingRun.hs b/src/Database/LSMTree/Internal/MergingRun.hs index ed6931287..946495424 100644 --- a/src/Database/LSMTree/Internal/MergingRun.hs +++ b/src/Database/LSMTree/Internal/MergingRun.hs @@ -231,6 +231,14 @@ duplicateRuns (DeRef mr) = V.mapM (\r -> withRollback reg (dupRef r) releaseRef) rs -- | Take a snapshot of the state of a merging run. +-- +-- TODO: this is not concurrency safe! The inputs runs to the merging run could +-- be released concurrently by another thread that completes the merge, while +-- the snapshot is taking place. The solution is for snapshot here to duplicate +-- the runs it returns _while_ holding the mergeState MVar (to exclude threads +-- that might concurrently complete the merge). And then the caller of course +-- must be updated to release the extra references. +-- snapshot :: (PrimMonad m, MonadMVar m) => Ref (MergingRun m h) diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index 72033e9f0..9f0c6b874 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -171,6 +171,12 @@ instance NFData r => NFData (SnapMergingRunState r) where Conversion to levels snapshot format -------------------------------------------------------------------------------} +--TODO: probably generally all the Ref (Run _) here ought to be fresh +-- references, created as we snapshot the levels, so that the runs don't +-- disappear under our feet during the process of making the snapshot durable. +-- At minimum the volatile runs are the inputs to merging runs, but it may be +-- simpler to duplicate them all, and release them all at the end. + {-# SPECIALISE toSnapLevels :: Levels IO h -> IO (SnapLevels (Ref (Run IO h))) #-} toSnapLevels :: (PrimMonad m, MonadMVar m) @@ -200,6 +206,8 @@ toSnapIncomingRun (Merging mergePolicy mergingRun) = do MR.SuppliedCredits (MR.Credits suppliedCredits), mergeNumRuns, mergeNumEntries) <- MR.snapshot mergingRun + -- TODO: MR.snapshot needs to return duplicated run references, and we + -- need to arrange to release them when the snapshoting is done. let smrs = toSnapMergingRunState mergingRunState pure $ SnapMergingRun From b22f64cb1a9be64b7b73b9584b1ea34c6382594b Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 13 Feb 2025 11:45:59 +0000 Subject: [PATCH 5/5] PR 554 follow-up: update some TODOs about exception safety And remove a stale comment about non-zero cases and replace it by an assertion. --- src/Database/LSMTree/Internal/MergingRun.hs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Database/LSMTree/Internal/MergingRun.hs b/src/Database/LSMTree/Internal/MergingRun.hs index 946495424..32c0889ec 100644 --- a/src/Database/LSMTree/Internal/MergingRun.hs +++ b/src/Database/LSMTree/Internal/MergingRun.hs @@ -712,11 +712,10 @@ performMergeSteps :: -> Credits -> m Bool performMergeSteps mergeVar creditsVar (Credits credits) = + assert (credits >= 0) $ withMVar mergeVar $ \case CompletedMerge{} -> pure False OngoingMerge _rs m -> do - -- We have dealt with the case of credits <= 0 above, - -- so here we know credits is positive let stepsToDo = credits (stepsDone, stepResult) <- Merge.steps m stepsToDo assert (stepResult == MergeDone || stepsDone >= stepsToDo) (pure ()) @@ -753,8 +752,9 @@ completeMerge mergeVar mergeKnownCompletedVar = do (OngoingMerge rs m) -> do -- first try to complete the merge before performing other side effects, -- in case the completion fails - --TODO: Run.fromMutable claims not to be exception safe - -- may need to use uninteruptible mask + --TODO: Run.fromMutable (used in Merge.complete) claims not to be + -- exception safe so we should probably be using the resource registry + -- and test for exception safety. r <- Merge.complete m V.forM_ rs releaseRef -- Cache the knowledge that we completed the merge @@ -778,11 +778,13 @@ expectCompleted (DeRef MergingRun {..}) = do let totalDebt = numEntriesToTotalDebt mergeNumEntries suppliedCredits = spentCredits + unspentCredits !credits = assert (suppliedCredits == totalDebt) $ + assert (unspentCredits >= 0) $ unspentCredits - --TODO: what about exception safety: check if it is ok to be interrupted - -- between performMergeSteps and completeMerge here, and above. weFinishedMerge <- performMergeSteps mergeState mergeCreditsVar credits + -- If an async exception happens before we get to perform the + -- completion, then that is fine. The next 'expectCompleted' will + -- complete the merge. when weFinishedMerge $ completeMerge mergeState mergeKnownCompleted withMVar mergeState $ \case CompletedMerge r -> dupRef r -- return a fresh reference to the run