diff --git a/fuzzing/corpus/corpus.go b/fuzzing/corpus/corpus.go index d9c18236..58f4aaed 100644 --- a/fuzzing/corpus/corpus.go +++ b/fuzzing/corpus/corpus.go @@ -47,6 +47,11 @@ type Corpus struct { // are executed to check for test failures. unexecutedCallSequences []calls.CallSequence + // unreplayedSequenceIndex tracks the next sequence to be replayed from loaded files during parallel corpus replay + unreplayedSequenceIndex int + // totalSequencesForReplay tracks the total number of sequences available for replay + totalSequencesForReplay int + // mutationTargetSequenceChooser is a provider that allows for weighted random selection of callSequences. If a // call sequence was not found to be compatible with this run, it is not added to the chooser. mutationTargetSequenceChooser *randomutils.WeightedRandomChooser[calls.CallSequence] @@ -69,6 +74,8 @@ func NewCorpus(corpusDirectory string) (*Corpus, error) { callSequenceFiles: newCorpusDirectory[calls.CallSequence](""), testResultSequenceFiles: newCorpusDirectory[calls.CallSequence](""), unexecutedCallSequences: make([]calls.CallSequence, 0), + unreplayedSequenceIndex: 0, + totalSequencesForReplay: 0, logger: logging.GlobalLogger.NewSubLogger("module", "corpus"), } @@ -95,6 +102,9 @@ func NewCorpus(corpusDirectory string) (*Corpus, error) { if err != nil { return nil, err } + + // Calculate total sequences available for replay (test results first, then call sequences) + corpus.totalSequencesForReplay = len(corpus.testResultSequenceFiles.files) + len(corpus.callSequenceFiles.files) } return corpus, nil @@ -304,8 +314,9 @@ func (c *Corpus) initializeSequences(sequenceFiles *corpusDirectory[calls.CallSe return nil } -// Initialize initializes any runtime data needed for a Corpus on startup. Call sequences are replayed on the post-setup -// (deployment) test chain to calculate coverage, while resolving references to compiled contracts. +// Initialize initializes any runtime data needed for a Corpus on startup. Call sequences are loaded from disk +// but not executed - execution is handled by individual workers using NextUnreplayedSequence() for parallel +// corpus replay with test providers. // Returns the active number of corpus items, total number of corpus items, or an error if one occurred. If an error // is returned, then the corpus counts returned will always be zero. func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions contracts.Contracts) (int, int, error) { @@ -374,26 +385,18 @@ func (c *Corpus) Initialize(baseTestChain *chain.TestChain, contractDefinitions } } - // Next we replay every call sequence, checking its validity on this chain and measuring coverage. Valid sequences - // are added to the corpus for mutations, re-execution, etc. - // - // The order of initializations here is important, as it determines the order of "unexecuted sequences" to replay - // when the fuzzer's worker starts up. We want to replay test results first, so that other corpus items - // do not trigger the same test failures instead. - err = c.initializeSequences(c.testResultSequenceFiles, testChain, deployedContracts, false) - if err != nil { - return 0, 0, err - } - - err = c.initializeSequences(c.callSequenceFiles, testChain, deployedContracts, true) - if err != nil { - return 0, 0, err - } + // Note: Corpus replay (sequence execution and validation) is now handled by individual workers + // using NextUnreplayedSequence() for parallel execution with test providers. + // The old initializeSequences execution logic is replaced by this worker-based approach. // Calculate corpus health metrics corpusSequencesTotal := len(c.callSequenceFiles.files) + len(c.testResultSequenceFiles.files) corpusSequencesActive := len(c.unexecutedCallSequences) + // Initialize total sequences for replay (used by NextUnreplayedSequence) + c.totalSequencesForReplay = corpusSequencesTotal + c.unreplayedSequenceIndex = 0 + return corpusSequencesActive, corpusSequencesTotal, nil } @@ -506,6 +509,57 @@ func (c *Corpus) CheckSequenceCoverageAndUpdate(callSequence calls.CallSequence, return nil } +// RegisterReplayedSequence registers a successfully replayed sequence with the corpus for use in mutations. +// This should be called by workers after successfully executing and validating a corpus sequence. +// The useInMutations parameter indicates whether the sequence should be available for mutation-based fuzzing. +func (c *Corpus) RegisterReplayedSequence(sequence calls.CallSequence, useInMutations bool) error { + // Acquire our call sequences lock during the duration of this method. + c.callSequencesLock.Lock() + defer c.callSequencesLock.Unlock() + + // Add to mutation target chooser if requested and chooser exists + if useInMutations && c.mutationTargetSequenceChooser != nil { + c.mutationTargetSequenceChooser.AddChoices(randomutils.NewWeightedRandomChoice[calls.CallSequence](sequence, big.NewInt(1))) + } + + return nil +} + +// NextUnreplayedSequence returns the next call sequence from loaded files for parallel corpus replay. +// It iterates through test result sequences first, then call sequences, in a thread-safe manner. +// Returns the next sequence, a boolean indicating if more sequences are available, and a boolean +// indicating if this sequence should be used in mutations (false for test results, true for regular sequences). +// This method is intended for parallel corpus replay across multiple workers. +func (c *Corpus) NextUnreplayedSequence() (*calls.CallSequence, bool, bool) { + // Acquire our call sequences lock during the duration of this method. + c.callSequencesLock.Lock() + defer c.callSequencesLock.Unlock() + + // Check if we've exhausted all sequences + if c.unreplayedSequenceIndex >= c.totalSequencesForReplay { + return nil, false, false + } + + // First, iterate through test result sequences (useInMutations = false) + testResultCount := len(c.testResultSequenceFiles.files) + if c.unreplayedSequenceIndex < testResultCount { + sequenceFileData := c.testResultSequenceFiles.files[c.unreplayedSequenceIndex] + c.unreplayedSequenceIndex++ + return &sequenceFileData.data, true, false // Test results should NOT be used in mutations + } + + // Then iterate through regular call sequence files (useInMutations = true) + callSequenceIndex := c.unreplayedSequenceIndex - testResultCount + if callSequenceIndex < len(c.callSequenceFiles.files) { + sequenceFileData := c.callSequenceFiles.files[callSequenceIndex] + c.unreplayedSequenceIndex++ + return &sequenceFileData.data, true, true // Regular sequences should be used in mutations + } + + // This should not be reached given our bounds check above + return nil, false, false +} + // UnexecutedCallSequence returns a call sequence loaded from disk which has not yet been returned by this method. // It is intended to be used by the fuzzer to run all un-executed call sequences (without mutations) to check for test // failures. If a call sequence is returned, it will not be returned by this method again. diff --git a/fuzzing/fuzzer_worker.go b/fuzzing/fuzzer_worker.go index 2ee66b8b..0246a62a 100644 --- a/fuzzing/fuzzer_worker.go +++ b/fuzzing/fuzzer_worker.go @@ -340,13 +340,24 @@ func (fw *FuzzerWorker) testNextCallSequence() ([]ShrinkCallSequenceRequest, err } // Execute our call sequence. - _, err = calls.ExecuteCallSequenceIteratively(fw.chain, fetchElementFunc, executionCheckFunc) + executedSequence, err := calls.ExecuteCallSequenceIteratively(fw.chain, fetchElementFunc, executionCheckFunc) // If we encountered an error, report it. if err != nil { return nil, err } + // If this was a corpus replay sequence that executed successfully, register it with the corpus + if fw.sequenceGenerator.isCorpusReplay && len(executedSequence) > 0 { + // Use the mutation flag determined when the sequence was fetched from corpus + useInMutations := fw.sequenceGenerator.corpusSequenceUseInMutations + err = fw.fuzzer.corpus.RegisterReplayedSequence(executedSequence, useInMutations) + if err != nil { + // Log error but don't fail - this is not critical to execution + fw.fuzzer.logger.Debug("Failed to register replayed corpus sequence: ", err) + } + } + // If our fuzzer context is done, exit out immediately without results. if utils.CheckContextDone(fw.fuzzer.ctx) { return nil, nil diff --git a/fuzzing/fuzzer_worker_sequence_generator.go b/fuzzing/fuzzer_worker_sequence_generator.go index 1888e418..3ed0c962 100644 --- a/fuzzing/fuzzer_worker_sequence_generator.go +++ b/fuzzing/fuzzer_worker_sequence_generator.go @@ -34,6 +34,14 @@ type CallSequenceGenerator struct { // to its fetching by PopSequenceElement. prefetchModifyCallFunc PrefetchModifyCallFunc + // isCorpusReplay indicates whether we're currently replaying a corpus sequence (requiring validation) + // vs. doing normal fuzzing with generated sequences. + isCorpusReplay bool + + // corpusSequenceUseInMutations indicates whether the current corpus sequence should be used in mutations + // (false for test result sequences, true for regular corpus sequences) + corpusSequenceUseInMutations bool + // mutationStrategyChooser is a weighted random selector of functions that prepare the CallSequenceGenerator with // a baseSequence derived from corpus entries. mutationStrategyChooser *randomutils.WeightedRandomChooser[CallSequenceGeneratorMutationStrategy] @@ -194,13 +202,19 @@ func (g *CallSequenceGenerator) InitializeNextSequence() (bool, error) { g.fetchIndex = 0 g.prefetchModifyCallFunc = nil - // Check if there are any previously un-executed corpus call sequences. If there are, the fuzzer should execute - // those first. - unexecutedSequence := g.worker.fuzzer.corpus.UnexecutedCallSequence() - if unexecutedSequence != nil { - g.baseSequence = *unexecutedSequence + // Check if there are any corpus sequences to replay for parallel corpus replay with test providers. + // This replaces the old UnexecutedCallSequence mechanism. + unreplayedSequence, hasMore, useInMutations := g.worker.fuzzer.corpus.NextUnreplayedSequence() + if hasMore && unreplayedSequence != nil { + g.baseSequence = *unreplayedSequence + // Mark this as corpus replay for validation logic + g.isCorpusReplay = true + g.corpusSequenceUseInMutations = useInMutations return false, nil } + // Clear corpus replay flag for normal fuzzing + g.isCorpusReplay = false + g.corpusSequenceUseInMutations = false // We'll decide whether to create a new call sequence or mutating existing corpus call sequences. Any entries we // leave as nil will be populated by a newly generated call prior to being fetched from this provider. @@ -262,6 +276,32 @@ func (g *CallSequenceGenerator) PopSequenceElement() (*calls.CallSequenceElement } } + // If we're replaying a corpus sequence, perform validation (contract resolution, ABI resolution) + if g.isCorpusReplay && element != nil { + // Validate contract resolution for corpus sequences (similar to original initializeSequences logic) + if element.Call.To != nil { + // We are calling a contract with this call, ensure we can resolve the contract call is targeting. + resolvedContract, resolvedContractExists := g.worker.deployedContracts[*element.Call.To] + if !resolvedContractExists { + // Mark this corpus sequence as invalid and skip to next sequence + g.worker.fuzzer.corpus.logger.Debug("Corpus sequence disabled due to unresolvable contract at address: ", element.Call.To.String()) + return nil, nil // Return nil to indicate end of this sequence, worker will get next sequence + } + element.Contract = resolvedContract + + // Validate ABI values resolution + callAbiValues := element.Call.DataAbiValues + if callAbiValues != nil { + err := callAbiValues.Resolve(element.Contract.CompiledContract().Abi) + if err != nil { + // Mark this corpus sequence as invalid and skip to next sequence + g.worker.fuzzer.corpus.logger.Debug("Corpus sequence disabled due to ABI resolution error in contract '", element.Contract.Name(), "': ", err) + return nil, nil // Return nil to indicate end of this sequence, worker will get next sequence + } + } + } + } + // Update the element with the current nonce for the associated chain. element.Call.FillFromTestChainProperties(g.worker.chain)