Skip to content

Commit

Permalink
Make use of isotope label information in InChi descriptions when read…
Browse files Browse the repository at this point in the history
…ing transition lists and .msp files

Sometimes isotope label information can be found in InChi descriptions, e.g. the "1D3" in "InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1D3" which tells us that three of the hydrogens in C8H8O are deuterium. Previously Skyline did not look for these, which could lead to confusing mismatches between declared and calculated m/z values.
  • Loading branch information
bspratt authored Jan 23, 2025
1 parent 7afd2b8 commit 61c29e1
Show file tree
Hide file tree
Showing 10 changed files with 459 additions and 92 deletions.
56 changes: 56 additions & 0 deletions pwiz_tools/Skyline/Model/CustomMolecule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@ public static Dictionary<string, string> FormatAccessionNumbers(string keysTSV,
return keys;
}

public static MoleculeAccessionNumbers Create(Dictionary<string, string> accessions)
{
if (accessions == null || accessions.Count == 0)
{
return EMPTY;
}

return new MoleculeAccessionNumbers(accessions);
}

public MoleculeAccessionNumbers(string keysTSV, string inChiKey = null)
{
var keys = FormatAccessionNumbers(keysTSV, inChiKey);
Expand All @@ -115,6 +125,52 @@ public MoleculeAccessionNumbers(string keysTSV, string inChiKey = null)
AccessionNumbers = ImmutableSortedList<string, string>.FromValues(keys, ACCESSION_TYPE_SORTER);
}

private static readonly Regex REGEX_INCHI_ISOTOPES = new Regex(@"\d+([A-Za-z]+\d+)", // Position, isotope, weight (or isotope and count, for D and T)
RegexOptions.CultureInvariant | RegexOptions.Compiled); // N.B. we ignore position, as we don't carry that much structure detail

// Look for labels buried in descriptions, e.g. InChi's /i section
public Dictionary<string, int> FindLabels()
{
var inchi = GetInChI();
if (string.IsNullOrEmpty(inchi))
{
return null; // No InChI at all
}

// e.g. InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1D4 (replace 4 H with H')
// e.g. InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1C13,2C13,3C13,4C13 (replace C with C' at positions 1,2,3, and 4)
var parts = inchi.Split('/');
if (parts.Length < 5)
{
return null; // No /i section
}

Dictionary<string, int> result = null;
var matches = REGEX_INCHI_ISOTOPES.Matches(parts[4]);

foreach (Match match in matches)
{
var isotope = match.Groups[1].Value;
var count = 1;
if (isotope.StartsWith(BioMassCalc.D) || isotope.StartsWith(BioMassCalc.T)) // e.g. "D4" in ".../i1D4" (replace 4 H with H')
{
if (!int.TryParse(isotope.Substring(1), out count)) // Get the count, if any e.g. 3 in "/i1T3"
{
count = 1;
}
isotope = isotope.Substring(0, 1);
}
if (Adduct.DICT_ADDUCT_ISOTOPE_NICKNAMES.TryGetValue(isotope, out var skylineIsotope)) // e.g. "C13" => "C'"
{
result ??= new Dictionary<string, int>();
result.TryGetValue(skylineIsotope, out var existing);
result[skylineIsotope] = existing + count;
}
}

return result;
}

public bool IsEmpty {
get
{
Expand Down
10 changes: 10 additions & 0 deletions pwiz_tools/Skyline/Model/Lib/LibResources.designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pwiz_tools/Skyline/Model/Lib/LibResources.resx
Original file line number Diff line number Diff line change
Expand Up @@ -344,4 +344,7 @@
<data name="NistLibraryBase_GetMod_Unknown_modification__0__at_line__1_" xml:space="preserve">
<value>Unknown modification {0} at line {1}</value>
</data>
<data name="NistLibraryBase_CreateCache_Missing_details_for__0__at_line__1___this_entry_will_be_ignored" xml:space="preserve">
<value>Missing details for {0} at line {1}, this entry will be ignored</value>
</data>
</root>
8 changes: 8 additions & 0 deletions pwiz_tools/Skyline/Model/Lib/Library.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2142,6 +2142,14 @@ public static SmallMoleculeLibraryAttributes Create(string moleculeName, string
return Create(moleculeName, chemicalFormulaOrMassesString, inChiKey, otherKeys == null ? string.Empty : string.Join(@"\t", otherKeys.Select(kvp => kvp.Key + @":" + kvp.Value)));
}

public static SmallMoleculeLibraryAttributes Create(string moleculeName, string chemicalFormulaOrMassesString,
IDictionary<string, string> accessions)
{
accessions.TryGetValue(MoleculeAccessionNumbers.TagInChiKey, out var inChiKey);
return Create(moleculeName, chemicalFormulaOrMassesString, inChiKey,
string.Join(@"\t", accessions.Where(kvp => kvp.Key != MoleculeAccessionNumbers.TagInChiKey).Select(kvp => kvp.Key + @":" + kvp.Value)));
}

public static SmallMoleculeLibraryAttributes Create(string moleculeName, string chemicalFormulaOrMassesString,
string inChiKey, string otherKeys)
{
Expand Down
Loading

0 comments on commit 61c29e1

Please sign in to comment.