diff --git a/src/crates/heuristics/src/ast/change.rs b/src/crates/heuristics/src/ast/change.rs index 1a5366e..fba8f05 100644 --- a/src/crates/heuristics/src/ast/change.rs +++ b/src/crates/heuristics/src/ast/change.rs @@ -9,11 +9,12 @@ use tx_indexer_pipeline::{ }; use tx_indexer_primitives::{ AbstractTxIn, + handle::SpendableTxConstituent, unified::{AnyOutId, AnyTxId}, }; use crate::change_identification::{ - NLockTimeChangeIdentification, NaiveChangeIdentificationHueristic, TxOutChangeAnnotation, + NLockTimeChangeIdentification, NaiveChangeIdentificationHeuristic, TxOutChangeAnnotation, }; /// Node that identifies change outputs in transactions. @@ -44,10 +45,13 @@ impl Node for ChangeIdentificationNode { for output_id in txouts.iter() { let output = output_id.with(ctx.unified_storage()); - let is_change = matches!( - NaiveChangeIdentificationHueristic::is_change(output), - TxOutChangeAnnotation::Change - ); + let is_change = match SpendableTxConstituent::try_new(output) { + Ok(spendable) => matches!( + NaiveChangeIdentificationHeuristic::is_change(spendable), + TxOutChangeAnnotation::Change + ), + Err(_) => false, // OP_RETURN outputs cannot be change + }; result.insert(*output_id, is_change); } @@ -114,10 +118,13 @@ impl Node for FingerPrintChangeIdentificationNode { let is_change = match output.spender_txin() { Some(spending_txin) => { let spending_tx = spending_txin.containing_tx(); - matches!( - NLockTimeChangeIdentification::is_change(output, spending_tx), - TxOutChangeAnnotation::Change - ) + match SpendableTxConstituent::try_new(output) { + Ok(spendable) => matches!( + NLockTimeChangeIdentification::is_change(spendable, spending_tx), + TxOutChangeAnnotation::Change + ), + Err(_) => false, // OP_RETURN outputs cannot be change + } } None => false, // Unspent output: not change by fingerprint }; diff --git a/src/crates/heuristics/src/ast/uih.rs b/src/crates/heuristics/src/ast/uih.rs index 8a000f7..34659eb 100644 --- a/src/crates/heuristics/src/ast/uih.rs +++ b/src/crates/heuristics/src/ast/uih.rs @@ -6,13 +6,17 @@ use std::collections::HashSet; +use bitcoin::Amount; use tx_indexer_pipeline::{ engine::EvalContext, expr::Expr, node::{Node, NodeId}, value::{TxMask, TxOutSet, TxSet}, }; -use tx_indexer_primitives::unified::{AnyOutId, AnyTxId}; +use tx_indexer_primitives::{ + traits::abstract_types::{EnumerateInputValueInArbitraryOrder, HasScriptPubkey}, + unified::{AnyOutId, AnyTxId}, +}; use crate::uih::UnnecessaryInputHeuristic; @@ -45,12 +49,20 @@ impl Node for UnnecessaryInputHeuristic1Node { for tx_id in &tx_ids { let tx = tx_id.with(ctx.unified_storage()); - let outputs: Vec<_> = tx.outputs().map(|o| (o.id(), o.value())).collect(); - if outputs.is_empty() { + let outputs: Vec<_> = tx + .outputs() + .filter(|o| o.is_spendable()) + .map(|o| (o.id(), o.value())) + .collect(); + let Some(min_out) = outputs.iter().map(|(_, v)| *v).min() else { continue; - } + }; + let Some(min_in) = tx.input_values().min() else { + continue; + }; - if let Some(min_out) = UnnecessaryInputHeuristic::uih1_min_output_value(&tx) { + if let Some(min_out) = UnnecessaryInputHeuristic::uih1_min_output_value(min_in, min_out) + { for (out_id, v) in &outputs { if *v == min_out { result.insert(*out_id); @@ -108,7 +120,24 @@ impl Node for UnnecessaryInputHeuristic2Node { for tx_id in &tx_ids { let tx = tx_id.with(ctx.unified_storage()); - result.insert(*tx_id, UnnecessaryInputHeuristic::is_uih2(&tx)); + let input_values: Vec = tx.input_values().collect(); + let output_values: Vec = tx + .outputs() + .filter(|o| o.is_spendable()) + .map(|o| o.value()) + .collect(); + + let flagged = if input_values.len() < 2 || output_values.is_empty() { + false + } else { + let sum_in = input_values.iter().copied().sum(); + let min_in = input_values.iter().copied().min().expect("len >= 2"); + let sum_out = output_values.iter().copied().sum(); + let min_out = output_values.iter().copied().min().expect("non-empty"); + UnnecessaryInputHeuristic::is_uih2(sum_in, min_in, sum_out, min_out) + }; + + result.insert(*tx_id, flagged); } result @@ -139,7 +168,7 @@ mod tests { use tx_indexer_primitives::{ UnifiedStorage, loose::{LooseIndexBuilder, TxId, TxOutId}, - test_utils::DummyTxData, + test_utils::{DummyTxData, DummyTxOutData}, traits::abstract_types::AbstractTransaction, unified::{AnyOutId, AnyTxId}, }; @@ -200,6 +229,22 @@ mod tests { ] } + fn setup_uih1_op_return_value_zero_fixture() -> Vec> + { + vec![ + Arc::new(DummyTxData::new_with_amounts(vec![100])), + Arc::new(DummyTxData::new_with_amounts(vec![200])), + Arc::new(DummyTxData::new( + vec![ + DummyTxOutData::new(150, 0), + DummyTxOutData::new_with_script(0, 1, vec![0x6a, 0x04, 0xde, 0xad, 0xbe, 0xef]), + ], + vec![TxOutId::new(TxId(1), 0), TxOutId::new(TxId(2), 0)], + 0, + )), + ] + } + fn setup_uih2_no_unnecessary_fixture() -> Vec> { vec![ Arc::new(DummyTxData::new_with_amounts(vec![100])), @@ -273,6 +318,22 @@ mod tests { ); } + #[test] + fn test_uih1_ignores_op_return() { + let all_txs = setup_uih1_op_return_value_zero_fixture(); + let ctx = Arc::new(PipelineContext::new()); + let mut engine = engine_with_loose(ctx.clone(), all_txs); + + let source = AllLooseTxs::new(&ctx); + let uih1 = UnnecessaryInputHeuristic1::new(source.txs()); + let result = engine.eval(&uih1); + + assert!( + result.is_empty(), + "UIH1 must not classify an OP_RETURN value=0 as a change candidate" + ); + } + #[test] fn test_uih1_tie_smallest_outputs() { let all_txs = setup_uih1_tie_fixture(); diff --git a/src/crates/heuristics/src/change_identification.rs b/src/crates/heuristics/src/change_identification.rs index 5b62333..5e44b44 100644 --- a/src/crates/heuristics/src/change_identification.rs +++ b/src/crates/heuristics/src/change_identification.rs @@ -1,5 +1,5 @@ use tx_indexer_primitives::{ - handle::TxHandle, + handle::{SpendableTxConstituent, TxHandle}, traits::abstract_types::{HasNLockTime, HasScriptPubkey, OutputCount, TxConstituent}, }; @@ -9,11 +9,15 @@ pub enum TxOutChangeAnnotation { NotChange, } -pub struct NaiveChangeIdentificationHueristic; +pub struct NaiveChangeIdentificationHeuristic; -impl NaiveChangeIdentificationHueristic { +impl NaiveChangeIdentificationHeuristic { /// Check if a txout is change based on its containing transaction. - pub fn is_change(txout: impl TxConstituent) -> TxOutChangeAnnotation { + /// Accepts only spendable outputs; the type system rules OP_RETURN out. + pub fn is_change(txout: SpendableTxConstituent) -> TxOutChangeAnnotation + where + T: TxConstituent, + { let tx = txout.containing_tx(); if tx.output_count() > 0 && txout.vout() == tx.output_count() - 1 { TxOutChangeAnnotation::Change @@ -26,10 +30,15 @@ impl NaiveChangeIdentificationHueristic { pub struct NLockTimeChangeIdentification; impl NLockTimeChangeIdentification { - pub fn is_change( - tx_out: impl TxConstituent, + /// Check if a txout is change based on nLockTime comparison. + /// Accepts only spendable outputs; the type system rules OP_RETURN out. + pub fn is_change( + tx_out: SpendableTxConstituent, spending_tx: impl HasNLockTime, - ) -> TxOutChangeAnnotation { + ) -> TxOutChangeAnnotation + where + T: TxConstituent, + { let containing_tx_n_locktime = tx_out.containing_tx().n_locktime(); let child_tx_n_locktime = spending_tx.n_locktime(); if containing_tx_n_locktime == 0 && child_tx_n_locktime == 0 { @@ -55,9 +64,12 @@ impl ScriptTypesMatchingChangeIdentification { /// This applies the address-type heuristic conservatively: mixed input /// types, unresolved prevouts, or multiple matching outputs are all treated /// as inconclusive and return `NotChange`. - pub fn is_change<'a>( - tx_out: impl TxConstituent>, - ) -> TxOutChangeAnnotation { + /// + /// Accepts only spendable outputs; the type system rules OP_RETURN out. + pub fn is_change<'a, T>(tx_out: SpendableTxConstituent) -> TxOutChangeAnnotation + where + T: TxConstituent>, + { let tx = tx_out.containing_tx(); let mut input_types = tx.inputs().map(|input| input.output_type()); @@ -74,13 +86,18 @@ impl ScriptTypesMatchingChangeIdentification { return TxOutChangeAnnotation::NotChange; } - let matching_outputs: Vec = tx + let matching_indices: Vec = tx .outputs() .enumerate() .filter_map(|(index, output)| (output.output_type() == input_type).then_some(index)) .collect(); - if matching_outputs.len() == 1 && matching_outputs[0] == tx_out.vout() { + // There must be exactly one matching output for it to be considered change + if matching_indices.len() != 1 { + return TxOutChangeAnnotation::NotChange; + } + + if matching_indices[0] == tx_out.vout() { TxOutChangeAnnotation::Change } else { TxOutChangeAnnotation::NotChange @@ -96,7 +113,7 @@ mod tests { UnifiedStorage, loose::LooseIndexBuilder, loose::{TxId, TxOutId}, - test_utils::{DummyTxData, DummyTxOut, DummyTxOutData}, + test_utils::{DUMMY_UNSPENDABLE_SCRIPT, DummyTxData, DummyTxOut, DummyTxOutData}, unified::AnyOutId, }; @@ -125,8 +142,9 @@ mod tests { vout: 0, containing_tx: DummyTxData::new_with_amounts(vec![100]), }; + let spendable: SpendableTxConstituent<_> = txout.try_into().unwrap(); assert_eq!( - NaiveChangeIdentificationHueristic::is_change(txout), + NaiveChangeIdentificationHeuristic::is_change(spendable), TxOutChangeAnnotation::Change ); } @@ -138,8 +156,9 @@ mod tests { containing_tx: DummyTxData::new_with_amounts(vec![100]), }; let spending_tx = DummyTxData::new_with_amounts(vec![100]); + let spendable: SpendableTxConstituent<_> = tx_out.try_into().unwrap(); assert_eq!( - NLockTimeChangeIdentification::is_change(tx_out, spending_tx), + NLockTimeChangeIdentification::is_change(spendable, spending_tx), TxOutChangeAnnotation::NotChange ); @@ -149,8 +168,9 @@ mod tests { containing_tx: DummyTxData::new(vec![DummyTxOutData::new(100, 0)], vec![], 1), }; let spending_tx = DummyTxData::new(vec![DummyTxOutData::new(100, 0)], vec![], 1); + let spendable: SpendableTxConstituent<_> = tx_out.try_into().unwrap(); assert_eq!( - NLockTimeChangeIdentification::is_change(tx_out, spending_tx), + NLockTimeChangeIdentification::is_change(spendable, spending_tx), TxOutChangeAnnotation::Change ); } @@ -195,11 +215,11 @@ mod tests { let change = AnyOutId::from(TxOutId::new(TxId(3), 1)).with(&storage); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(payment), + ScriptTypesMatchingChangeIdentification::is_change(payment.try_into().unwrap()), TxOutChangeAnnotation::NotChange ); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(change), + ScriptTypesMatchingChangeIdentification::is_change(change.try_into().unwrap()), TxOutChangeAnnotation::Change ); } @@ -244,15 +264,80 @@ mod tests { let change = AnyOutId::from(TxOutId::new(TxId(3), 1)).with(&storage); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(payment), + ScriptTypesMatchingChangeIdentification::is_change(payment.try_into().unwrap()), TxOutChangeAnnotation::NotChange ); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(change), + ScriptTypesMatchingChangeIdentification::is_change(change.try_into().unwrap()), TxOutChangeAnnotation::NotChange ); } + #[test] + fn test_script_types_matching_excludes_op_return() { + let storage = storage_from_loose_txs(vec![ + // inputs: P2PKH + DummyTxData::new_with_outputs(vec![DummyTxOutData::new_with_script( + 100, + 0, + script_from_address("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"), + )]), + DummyTxData::new_with_outputs(vec![DummyTxOutData::new_with_script( + 150, + 0, + script_from_address("1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2"), + )]), + DummyTxData::new( + vec![ + // OP_RETURN output - should never be considered change + DummyTxOutData::new_with_script(0, 0, vec![0x6a, 0x04, 0xde, 0xad, 0xbe, 0xef]), + // P2PKH output - the only spendable P2PKH, so it's change + DummyTxOutData::new_with_script( + 249, + 1, + script_from_address("1BoatSLRHtKNngkdXEeobR76b53LETtpyT"), + ), + ], + vec![TxOutId::new(TxId(1), 0), TxOutId::new(TxId(2), 0)], + 0, + ), + ]); + + let op_return_output = AnyOutId::from(TxOutId::new(TxId(3), 0)).with(&storage); + let p2pkh_output = AnyOutId::from(TxOutId::new(TxId(3), 1)).with(&storage); + + // OP_RETURN cannot be wrapped in SpendableTxConstituent -- type system rejects it + assert!(TryInto::>::try_into(op_return_output).is_err()); + + // P2PKH output is change since it's the only spendable output matching input type + assert_eq!( + ScriptTypesMatchingChangeIdentification::is_change(p2pkh_output.try_into().unwrap()), + TxOutChangeAnnotation::Change + ); + } + + #[test] + fn test_unspendable_cannot_be_wrapped() { + // The type system enforces unspendable output exclusion at the wrapper boundary, + // so heuristics never have to check for it themselves. + let txout_unspendable = DummyTxOut { + vout: 1, + containing_tx: DummyTxData::new( + vec![ + DummyTxOutData::new_with_script( + 100, + 0, + script_from_address("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"), + ), + DummyTxOutData::new_with_script(0, 1, DUMMY_UNSPENDABLE_SCRIPT), + ], + vec![], + 0, + ), + }; + assert!(TryInto::>::try_into(txout_unspendable).is_err()); + } + #[test] fn test_script_types_matching_requires_unique_output_match() { let storage = storage_from_loose_txs(vec![ @@ -290,11 +375,11 @@ mod tests { let output1 = AnyOutId::from(TxOutId::new(TxId(3), 1)).with(&storage); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(output0), + ScriptTypesMatchingChangeIdentification::is_change(output0.try_into().unwrap()), TxOutChangeAnnotation::NotChange ); assert_eq!( - ScriptTypesMatchingChangeIdentification::is_change(output1), + ScriptTypesMatchingChangeIdentification::is_change(output1.try_into().unwrap()), TxOutChangeAnnotation::NotChange ); } diff --git a/src/crates/heuristics/src/uih.rs b/src/crates/heuristics/src/uih.rs index 46222a4..ddb60cc 100644 --- a/src/crates/heuristics/src/uih.rs +++ b/src/crates/heuristics/src/uih.rs @@ -1,58 +1,20 @@ use bitcoin::Amount; -use tx_indexer_primitives::traits::abstract_types::{ - EnumerateInputValueInArbitraryOrder, EnumerateOutputValueInArbitraryOrder, -}; pub struct UnnecessaryInputHeuristic; impl UnnecessaryInputHeuristic { - pub fn uih1_min_output_value(tx: &T) -> Option - where - T: EnumerateInputValueInArbitraryOrder + EnumerateOutputValueInArbitraryOrder, - { - let input_values: Vec = tx.input_values().collect(); - let output_values: Vec = tx.output_values().collect(); - - if input_values.is_empty() || output_values.is_empty() { - return None; - } - - let min_in = input_values - .iter() - .min() - .copied() - .expect("non-empty inputs"); - let min_out = output_values - .iter() - .min() - .copied() - .expect("non-empty outputs"); - - if min_out < min_in { - Some(min_out) - } else { - None - } + /// UIH1 (Optimal change): the smallest output is likely change when it is + /// strictly less than the smallest input. Returns `Some(min_out)` in that + /// case. Caller is responsible for excluding unspendable outputs from + /// `min_out` — including them (often value=0) would always trip UIH1. + pub fn uih1_min_output_value(min_in: Amount, min_out: Amount) -> Option { + (min_out < min_in).then_some(min_out) } - pub fn is_uih2(tx: &T) -> bool - where - T: EnumerateInputValueInArbitraryOrder + EnumerateOutputValueInArbitraryOrder, - { - let input_values: Vec = tx.input_values().collect(); - let output_values: Vec = tx.output_values().collect(); - - if input_values.len() < 2 || output_values.is_empty() { - return false; - } - - let sum_in = input_values.iter().fold(Amount::from_sat(0), |a, b| a + *b); - let min_in = input_values.iter().min().copied().expect("len >= 2"); - let sum_out = output_values - .iter() - .fold(Amount::from_sat(0), |a, b| a + *b); - let min_out = output_values.iter().min().copied().expect("non-empty"); - + /// UIH2 (Unnecessary input): the largest output could be paid without the + /// smallest input. Caller is responsible for excluding unspendable outputs + /// from the sum/min and ensuring there are at least two inputs. + pub fn is_uih2(sum_in: Amount, min_in: Amount, sum_out: Amount, min_out: Amount) -> bool { (sum_in - min_in) >= (sum_out - min_out) } } diff --git a/src/crates/primitives/src/handle.rs b/src/crates/primitives/src/handle.rs index 3a90d43..be12efa 100644 --- a/src/crates/primitives/src/handle.rs +++ b/src/crates/primitives/src/handle.rs @@ -65,6 +65,14 @@ pub struct TxOutHandle<'a> { pub(crate) index: &'a dyn IndexedGraph, } +impl<'a> std::fmt::Debug for TxOutHandle<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TxOutHandle") + .field("out_id", &self.out_id) + .finish() + } +} + impl<'a> TxOutHandle<'a> { pub fn id(&self) -> AnyOutId { self.out_id @@ -311,3 +319,43 @@ impl<'a> TxConstituent for TxOutHandle<'a> { self.vout() as usize } } + +/// Wrapper guaranteeing the inner value is a spendable output (not OP_RETURN). +/// +/// Construct via [`SpendableTxConstituent::try_new`]: `Ok` for spendable outputs, +/// `Err` returns the original value back so callers can handle the unspendable +/// case explicitly. +pub struct SpendableTxConstituent(T); + +impl SpendableTxConstituent { + /// Wraps `value` if it is spendable; returns it back as `Err` if unspendable. + pub fn try_new(value: T) -> Result { + if value.is_spendable() { + Ok(Self(value)) + } else { + Err(value) + } + } +} + +impl<'a> TryFrom> for SpendableTxConstituent> { + type Error = TxOutHandle<'a>; + + fn try_from(value: TxOutHandle<'a>) -> Result { + Self::try_new(value) + } +} + +impl SpendableTxConstituent { + pub fn into_inner(self) -> T { + self.0 + } +} + +impl std::ops::Deref for SpendableTxConstituent { + type Target = T; + + fn deref(&self) -> &T { + &self.0 + } +} diff --git a/src/crates/primitives/src/test_utils/mod.rs b/src/crates/primitives/src/test_utils/mod.rs index b8b32cf..2196219 100644 --- a/src/crates/primitives/src/test_utils/mod.rs +++ b/src/crates/primitives/src/test_utils/mod.rs @@ -12,6 +12,10 @@ use crate::{ }, }; +/// Dummy unspendable script for testing - all bits set to 0xFF +/// This is used instead of real OP_RETURN scripts in tests to mark outputs as unspendable +pub const DUMMY_UNSPENDABLE_SCRIPT: &[u8] = &[0xFF, 0xFF, 0xFF, 0xFF]; + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DummyTxData { outputs: Vec, @@ -116,6 +120,11 @@ impl HasScriptPubkey for DummyTxOutData { fn script_pubkey_bytes(&self) -> Vec { self.script_pubkey.clone() } + + fn is_spendable(&self) -> bool { + // Check for our dummy unspendable script or real OP_RETURN + self.script_pubkey != DUMMY_UNSPENDABLE_SCRIPT && !self.is_op_return() + } } impl AbstractTxOut for DummyTxOutData { @@ -232,6 +241,7 @@ impl From for Box { } } +#[derive(Debug)] pub struct DummyTxOut { pub vout: usize, pub containing_tx: DummyTxData, @@ -247,3 +257,28 @@ impl TxConstituent for DummyTxOut { self.vout } } + +impl HasScriptPubkey for DummyTxOut { + fn script_pubkey_bytes(&self) -> Vec { + // Get the script pubkey from the corresponding output in the containing transaction + self.containing_tx + .outputs() + .nth(self.vout) + .map(|output| output.script_pubkey_bytes()) + .unwrap_or_default() + } + + fn is_spendable(&self) -> bool { + // Check for our dummy unspendable script or real OP_RETURN + let script = self.script_pubkey_bytes(); + script != DUMMY_UNSPENDABLE_SCRIPT && !self.is_op_return() + } +} + +impl TryFrom for crate::handle::SpendableTxConstituent { + type Error = DummyTxOut; + + fn try_from(value: DummyTxOut) -> Result { + Self::try_new(value) + } +} diff --git a/src/crates/primitives/src/traits/abstract_types.rs b/src/crates/primitives/src/traits/abstract_types.rs index 3e77588..e3bb206 100644 --- a/src/crates/primitives/src/traits/abstract_types.rs +++ b/src/crates/primitives/src/traits/abstract_types.rs @@ -92,6 +92,15 @@ pub trait HasScriptPubkey { fn output_type(&self) -> OutputType { classify_script_pubkey(&self.script_pubkey_bytes()) } + + fn is_op_return(&self) -> bool { + self.output_type() == OutputType::OpReturn + } + + /// Returns true if this output can be spent + fn is_spendable(&self) -> bool { + !self.is_op_return() + } } /// Transaction version