@@ -3179,125 +3179,133 @@ private List<ChromGroupHeaderInfo> tryLoadChromatogram(
31793179 GeneralPrecursor <?> precursor ,
31803180 double tolerance )
31813181 {
3182- // Add precursor matches to a list, if they match at least 1 transition
3183- // in this group, and are potentially the maximal transition match.
3184-
3185- // Using only the maximum works well for the case where there are 2
3186- // precursors in the same document that match a single entry.
3187- // TODO: But it messes up when there are 2 sets of transitions for
3188- // the same precursor covering different numbers of transitions.
3189- // Skyline never creates this case, but it has been reported
3190- // int maxTranMatch = 1;
3191-
31923182 if (_binaryParser != null && _binaryParser .getChromatograms () != null )
31933183 {
31943184 // ChromatogramCache.TryLoadChromInfo() in Skyline code:
31953185 // Filter the list of chromatograms based on our precursor mZ
3196- int i = findEntry (precursor .getSignedMz (), tolerance , _binaryParser .getChromatograms (), 0 , _binaryParser .getChromatograms ().length - 1 );
3186+ ChromGroupHeaderInfo [] chromHeaders = _binaryParser .getChromatograms ();
3187+ int i = findEntry (precursor .getSignedMz (), tolerance , chromHeaders , 0 , chromHeaders .length - 1 );
31973188 if (i == -1 )
31983189 {
31993190 return Collections .emptyList ();
32003191 }
32013192
32023193 Double explicitRT = molecule .getExplicitRetentionTime ();
32033194
3195+ List <ChromGroupHeaderInfo > result = new ArrayList <>();
3196+
32043197 // Add entries to a list until they no longer match
3205- List <ChromGroupHeaderInfo > listChromatograms = new ArrayList <>();
3206- while (i < _binaryParser .getChromatograms ().length &&
3207- matchMz (precursor .getSignedMz (), _binaryParser .getChromatograms ()[i ].getPrecursor (), tolerance ))
3198+ while (i < chromHeaders .length &&
3199+ matchMz (precursor .getSignedMz (), chromHeaders [i ].getPrecursor (), tolerance ))
32083200 {
3209- ChromGroupHeaderInfo chrom = _binaryParser .getChromatograms ()[i ++];
3201+ ChromGroupHeaderInfo chrom = chromHeaders [i ++];
3202+ // If explicit retention time info is available, use that to discard obvious mismatches
3203+ if (explicitRT != null && chrom .excludesTime (explicitRT ))
3204+ {
3205+ continue ;
3206+ }
3207+
32103208 // Sequence matching for extracted chromatogram data added in v1.5
32113209 ChromatogramGroupId chromTextId = _binaryParser .getTextId (chrom );
3212- if (chromTextId != null )
3210+ if (chromTextId != null )
32133211 {
3212+ // If we match based on textId, consider it a chromatogram worth storing
32143213 if (!molecule .targetMatches (chromTextId .getTarget ()))
3214+ {
32153215 continue ;
3216+ }
32163217 try
32173218 {
32183219 SpectrumFilter spectrumFilter = SpectrumFilter .fromByteArray (precursor .getSpectrumFilter ());
3219- if (!Objects .equals (spectrumFilter , chromTextId .getSpectrumFilter ()))
3220+ if (!Objects .equals (spectrumFilter , chromTextId .getSpectrumFilter ()))
32203221 {
32213222 continue ;
32223223 }
32233224 }
32243225 catch (InvalidProtocolBufferException e )
32253226 {
3226- _log .warn ("Error parsing spectrum filter {} " , e );
3227- return Collections . emptyList () ;
3227+ _log .warn ("Error parsing spectrum filter" , e );
3228+ continue ;
32283229 }
32293230 }
3230-
3231- // If explicit retention time info is available, use that to discard obvious mismatches
3232- if (explicitRT == null || !chrom .excludesTime (explicitRT ))
3233- {
3234- listChromatograms .add (chrom );
3235- }
3231+ result .add (chrom );
32363232 }
3233+ return findChromatogramsWithMostTransitions (precursor .getMz (), transitions , result );
3234+ }
32373235
3238- // MeasuredResults.TryLoadChromatogram in Skyline code:
3239- // Since we are reading and returning chromatograms for all replicates we need to maintain
3240- // the number of maximum transition matches for each replicate.
3241- // MeasuredResults.TryLoadChromatogram in Skyline reads and returns chromatograms for a single replicate.
3242- int [] maxTranMatches = new int [_binaryParser .getCacheFileSize ()];
3236+ return Collections .emptyList ();
3237+ }
32433238
3244- ChromGroupHeaderInfo [] chromArray = new ChromGroupHeaderInfo [_binaryParser .getCacheFileSize ()];
32453239
3246- for (ChromGroupHeaderInfo chromInfo : listChromatograms )
3247- {
3248- // If the chromatogram set has an optimization function then the number
3249- // of matching chromatograms per transition is a reflection of better
3250- // matching. Otherwise, we only expect one match per transition.
3251- // TODO - do we need this on the Java side?
3252- boolean multiMatch = false ;//chromatogram.OptimizationFunction != null;
3240+ /**
3241+ * Within each replicate, if there is more than one ChromGroupHeaderInfo, find the header infos with the most matching
3242+ * transitions, and, if there are multiple headers with the same number of matching transitions, then find the ones
3243+ * that have the closest match to the precursor m/z.
3244+ */
3245+ private List <ChromGroupHeaderInfo > findChromatogramsWithMostTransitions (
3246+ double precursorMz , List <? extends GeneralTransition > transitions , List <ChromGroupHeaderInfo > headerInfos )
3247+ {
3248+ Map <String , List <ChromGroupHeaderInfo >> byFile = headerInfos .stream ()
3249+ .collect (Collectors .groupingBy (headerInfo -> _binaryParser .getFilePath (headerInfo )));
32533250
3254- int tranMatch = _binaryParser . matchTransitions ( chromInfo , transitions , explicitRT , tolerance , multiMatch );
3251+ List < ChromGroupHeaderInfo > result = new ArrayList <>( );
32553252
3256- int fileIndex = chromInfo .getFileIndex ();
3257- int maxTranMatch = maxTranMatches [fileIndex ];
3253+ for (SkylineReplicate skylineReplicate : _replicateList )
3254+ {
3255+ List <ChromGroupHeaderInfo > candidates = new ArrayList <>();
32583256
3259- if (tranMatch >= maxTranMatch )
3257+ for (SampleFile sampleFile : skylineReplicate .getSampleFileList ())
3258+ {
3259+ List <ChromGroupHeaderInfo > listInFile = byFile .get (sampleFile .getFilePath ());
3260+ if (listInFile != null )
32603261 {
3261- // If new maximum, clear anything collected at the previous maximum
3262- if (tranMatch > maxTranMatch )
3263- {
3264- chromArray [fileIndex ] = null ;
3265- }
3262+ candidates .addAll (listInFile );
3263+ }
3264+ }
32663265
3267- maxTranMatches [fileIndex ] = tranMatch ;
3266+ if (candidates .size () <= 1 )
3267+ {
3268+ result .addAll (candidates );
3269+ continue ;
3270+ }
32683271
3269- if (chromArray [fileIndex ] != null )
3270- {
3271- // If more than one value was found, ensure that there
3272- // is only one precursor match per file.
3273- // Use the entry with the m/z closest to the target
3274- ChromGroupHeaderInfo currentChromForFileIndex = chromArray [fileIndex ];
3275- // Use the entry with the m/z closest to the target
3276- if (Math .abs (precursor .getMz () - chromInfo .getPrecursorMz ()) <
3277- Math .abs (precursor .getMz () - currentChromForFileIndex .getPrecursorMz ()))
3278- {
3279- chromArray [fileIndex ] = chromInfo ;
3280- }
3281- }
3282- else
3283- {
3284- chromArray [fileIndex ] = chromInfo ;
3285- }
3272+ int [] transitionCounts = new int [candidates .size ()];
3273+ int maxTransitionCount = 0 ;
3274+ for (int i = 0 ; i < candidates .size (); i ++)
3275+ {
3276+ int transitionCount = _binaryParser .countTransitionMatches (candidates .get (i ), transitions , _matchTolerance );
3277+ transitionCounts [i ] = transitionCount ;
3278+ maxTransitionCount = Math .max (transitionCount , maxTransitionCount );
3279+ }
3280+ List <ChromGroupHeaderInfo > candidatesWithMostTransitions = new ArrayList <ChromGroupHeaderInfo >();
3281+ for (int i = 0 ; i < candidates .size (); i ++)
3282+ {
3283+ if (transitionCounts [i ] == maxTransitionCount )
3284+ {
3285+ candidatesWithMostTransitions .add (candidates .get (i ));
32863286 }
32873287 }
3288+ if (candidatesWithMostTransitions .size () <= 1 )
3289+ {
3290+ result .addAll (candidatesWithMostTransitions );
3291+ continue ;
3292+ }
32883293
3289- List <ChromGroupHeaderInfo > finalList = new ArrayList <>();
3290- for (ChromGroupHeaderInfo info : chromArray )
3294+ // If multiple chromGroups tied for the number of matching transitions, then break the tie using
3295+ // precursor m/z distance
3296+ double minMzDelta = candidatesWithMostTransitions .stream ()
3297+ .mapToDouble (headerInfo -> Math .abs (precursorMz - headerInfo .getPrecursorMz ()))
3298+ .min ().getAsDouble ();
3299+ for (ChromGroupHeaderInfo headerInfo : candidatesWithMostTransitions )
32913300 {
3292- if (info != null )
3301+ if (Math . abs ( precursorMz - headerInfo . getPrecursorMz ()) == minMzDelta )
32933302 {
3294- finalList .add (info );
3303+ result .add (headerInfo );
32953304 }
32963305 }
3297- return finalList ;
32983306 }
32993307
3300- return Collections . emptyList () ;
3308+ return result ;
33013309 }
33023310
33033311 public int getPeptideGroupCount ()
0 commit comments