@@ -3,7 +3,7 @@ use std::cmp::Ordering;
33use crate :: cigar:: mate_unclipped_5prime;
44use crate :: cigar:: unclipped_5prime_sort;
55use crate :: fields:: { self , flags, mate_pos, mate_ref_id, pos, read_name, ref_id} ;
6- use crate :: tags:: { find_mc_tag_in_record, find_mi_tag_in_record} ;
6+ use crate :: tags:: { find_mc_tag_in_record, find_mi_tag_in_record, find_string_tag_in_record } ;
77
88#[ must_use]
99pub fn compare_coordinate_raw ( a : & [ u8 ] , b : & [ u8 ] ) -> Ordering {
@@ -57,9 +57,15 @@ pub fn compare_queryname_raw(a: &[u8], b: &[u8]) -> Ordering {
5757/// Compare for template-coordinate ordering using raw bytes.
5858///
5959/// This matches samtools' template-coordinate sorting which uses unclipped 5' positions.
60+ /// When `cell_tag` is `Some`, the CB (cellular barcode) tag is included in the comparison
61+ /// between neg2 and MI, matching fgbio's sort order.
6062#[ inline]
6163#[ must_use]
62- pub fn compare_template_coordinate_raw ( a : & [ u8 ] , b : & [ u8 ] ) -> Ordering {
64+ pub fn compare_template_coordinate_raw (
65+ a : & [ u8 ] ,
66+ b : & [ u8 ] ,
67+ cell_tag : Option < & [ u8 ; 2 ] > ,
68+ ) -> Ordering {
6369 // Extract all needed fields from both records
6470 let a_tid = ref_id ( a) ;
6571 let a_pos = pos ( a) ;
@@ -133,6 +139,16 @@ pub fn compare_template_coordinate_raw(a: &[u8], b: &[u8]) -> Ordering {
133139 ( false , true ) => Ordering :: Greater ,
134140 _ => Ordering :: Equal ,
135141 } )
142+ . then_with ( || {
143+ // CB (cellular barcode): length-first, then lexicographic (matching fgbio)
144+ if let Some ( tag) = cell_tag {
145+ let a_cb = find_string_tag_in_record ( a, tag) . unwrap_or ( b"" ) ;
146+ let b_cb = find_string_tag_in_record ( b, tag) . unwrap_or ( b"" ) ;
147+ a_cb. len ( ) . cmp ( & b_cb. len ( ) ) . then_with ( || a_cb. cmp ( b_cb) )
148+ } else {
149+ Ordering :: Equal
150+ }
151+ } )
136152 . then_with ( || compare_mi_tags_raw ( a, b) )
137153 . then_with ( || compare_names_raw ( a, b) )
138154 . then_with ( || a_upper. cmp ( & b_upper) )
@@ -201,6 +217,7 @@ fn compare_mi_tags_raw(a: &[u8], b: &[u8]) -> Ordering {
201217mod tests {
202218 use super :: * ;
203219 use crate :: testutil:: * ;
220+ use rstest:: rstest;
204221 use std:: cmp:: Ordering ;
205222
206223 // ========================================================================
@@ -319,7 +336,7 @@ mod tests {
319336 -1 ,
320337 & [ ] ,
321338 ) ;
322- assert_eq ! ( compare_template_coordinate_raw( & rec, & rec) , Ordering :: Equal ) ;
339+ assert_eq ! ( compare_template_coordinate_raw( & rec, & rec, None ) , Ordering :: Equal ) ;
323340 }
324341
325342 #[ test]
@@ -329,8 +346,8 @@ mod tests {
329346 let cigar = & [ ( 10 << 4 ) | 0 ] ; // 10M
330347 let a = make_bam_bytes ( 0 , 100 , 0 , b"rea" , cigar, 10 , -1 , -1 , & [ ] ) ;
331348 let b = make_bam_bytes ( 2 , 100 , 0 , b"rea" , cigar, 10 , -1 , -1 , & [ ] ) ;
332- assert_eq ! ( compare_template_coordinate_raw( & a, & b) , Ordering :: Less ) ;
333- assert_eq ! ( compare_template_coordinate_raw( & b, & a) , Ordering :: Greater ) ;
349+ assert_eq ! ( compare_template_coordinate_raw( & a, & b, None ) , Ordering :: Less ) ;
350+ assert_eq ! ( compare_template_coordinate_raw( & b, & a, None ) , Ordering :: Greater ) ;
334351 }
335352
336353 #[ test]
@@ -339,7 +356,7 @@ mod tests {
339356 let a = make_bam_bytes ( -1 , -1 , flags:: UNMAPPED , b"aaa" , & [ ] , 0 , -1 , -1 , & [ ] ) ;
340357 let b = make_bam_bytes ( -1 , -1 , flags:: UNMAPPED , b"zzz" , & [ ] , 0 , -1 , -1 , & [ ] ) ;
341358 // Both fully unmapped, compare by name
342- assert_eq ! ( compare_template_coordinate_raw( & a, & b) , Ordering :: Less ) ;
359+ assert_eq ! ( compare_template_coordinate_raw( & a, & b, None ) , Ordering :: Less ) ;
343360 }
344361
345362 #[ test]
@@ -350,7 +367,7 @@ mod tests {
350367 let b = make_bam_bytes ( 0 , 200 , flags:: PAIRED , b"rea" , cigar, 10 , 0 , 100 , & [ ] ) ;
351368 // a is unmapped with mate at tid=0,pos=100; b is mapped at tid=0,pos=200
352369 // a sorts by mate position (100) which is < b's position (200)
353- let cmp = compare_template_coordinate_raw ( & a, & b) ;
370+ let cmp = compare_template_coordinate_raw ( & a, & b, None ) ;
354371 assert_ne ! ( cmp, Ordering :: Equal ) ;
355372 }
356373
@@ -381,7 +398,7 @@ mod tests {
381398 & [ ] ,
382399 ) ;
383400 // Both mapped, mates unmapped. a at pos 100 < b at pos 200
384- assert_eq ! ( compare_template_coordinate_raw( & a, & b) , Ordering :: Less ) ;
401+ assert_eq ! ( compare_template_coordinate_raw( & a, & b, None ) , Ordering :: Less ) ;
385402 }
386403
387404 #[ test]
@@ -394,7 +411,7 @@ mod tests {
394411 let b = make_bam_bytes ( 0 , 100 , flags:: PAIRED , b"rea" , cigar, 10 , 0 , 200 , & [ ] ) ;
395412 // After canonical ordering, both should produce same (tid1=0,tid2=0,pos1=~100,pos2=~200)
396413 // so they compare equal on positions, differentiated by is_upper
397- let cmp = compare_template_coordinate_raw ( & a, & b) ;
414+ let cmp = compare_template_coordinate_raw ( & a, & b, None ) ;
398415 // a is_upper=true, b is_upper=false -> a > b (true > false)
399416 assert_eq ! ( cmp, Ordering :: Greater ) ;
400417 }
@@ -428,7 +445,7 @@ mod tests {
428445 & [ ] ,
429446 ) ;
430447 // Reverse strand sorts before forward in samtools convention (neg1=true < neg1=false)
431- let cmp = compare_template_coordinate_raw ( & a, & b) ;
448+ let cmp = compare_template_coordinate_raw ( & a, & b, None ) ;
432449 assert_ne ! ( cmp, Ordering :: Equal ) ;
433450 }
434451
@@ -467,7 +484,38 @@ mod tests {
467484 & aux_b,
468485 ) ;
469486 // MI 10 < MI 20
470- assert_eq ! ( compare_template_coordinate_raw( & a, & b) , Ordering :: Less ) ;
487+ assert_eq ! ( compare_template_coordinate_raw( & a, & b, None ) , Ordering :: Less ) ;
488+ }
489+
490+ // ========================================================================
491+ // compare_template_coordinate_raw: CB (cell barcode) tag tests
492+ // ========================================================================
493+
494+ /// CB ordering: parameterized cases for compare_template_coordinate_raw.
495+ #[ rstest]
496+ // Same-length CBs: AAAA < BBBB lexicographically
497+ #[ case( b"CBZ\x41 \x41 \x41 \x41 \x00 " . as_slice( ) , b"CBZ\x42 \x42 \x42 \x42 \x00 " . as_slice( ) , Some ( b"CB" ) , Ordering :: Less , "AAAA < BBBB" ) ]
498+ // CB ignored when cell_tag is None
499+ #[ case( b"CBZ\x41 \x41 \x41 \x41 \x00 " . as_slice( ) , b"CBZ\x42 \x42 \x42 \x42 \x00 " . as_slice( ) , None , Ordering :: Equal , "CB ignored without cell_tag" ) ]
500+ // Missing CB (empty) < present CB by length
501+ #[ case( b"" . as_slice( ) , b"CBZ\x41 \x41 \x41 \x41 \x00 " . as_slice( ) , Some ( b"CB" ) , Ordering :: Less , "no CB < has CB" ) ]
502+ // Shorter CB < longer CB by length
503+ #[ case( b"CBZA\x00 " . as_slice( ) , b"CBZAA\x00 " . as_slice( ) , Some ( b"CB" ) , Ordering :: Less , "A < AA by length" ) ]
504+ fn test_compare_template_coordinate_raw_cb_ordering (
505+ #[ case] aux_a : & [ u8 ] ,
506+ #[ case] aux_b : & [ u8 ] ,
507+ #[ case] cell_tag : Option < & [ u8 ; 2 ] > ,
508+ #[ case] expected : Ordering ,
509+ #[ case] msg : & str ,
510+ ) {
511+ let cigar = & [ ( 10 << 4 ) | 0 ] ; // 10M
512+ let a = make_bam_bytes (
513+ 0 , 100 , flags:: PAIRED | flags:: MATE_UNMAPPED , b"rea" , cigar, 10 , -1 , -1 , aux_a,
514+ ) ;
515+ let b = make_bam_bytes (
516+ 0 , 100 , flags:: PAIRED | flags:: MATE_UNMAPPED , b"rea" , cigar, 10 , -1 , -1 , aux_b,
517+ ) ;
518+ assert_eq ! ( compare_template_coordinate_raw( & a, & b, cell_tag) , expected, "{msg}" ) ;
471519 }
472520
473521 // ========================================================================
0 commit comments