Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
fdceeba
initial commit
ArthurDeclercq Feb 24, 2024
5374ed8
finalize ms2 feature generation
ArthurDeclercq Feb 25, 2024
60207a3
add rustyms
ArthurDeclercq Feb 25, 2024
ae39844
remove exit statement fixed IM required value
ArthurDeclercq Feb 26, 2024
9b98c4d
change logger.info to debug
ArthurDeclercq Feb 26, 2024
5e45756
added profile decorator to get timings for functions
ArthurDeclercq Feb 26, 2024
304777c
removed profile as standard rescore debug statement
ArthurDeclercq Feb 26, 2024
95ee475
added new basic features
ArthurDeclercq Feb 26, 2024
73f4573
fixes for ms2 feature generator, removed multiprocessing
ArthurDeclercq Feb 26, 2024
947233e
return empty list on parsing error with rustyms, removed multiprocessing
ArthurDeclercq Feb 28, 2024
24ce565
add deeplc_calibration psm set
ArthurDeclercq Mar 15, 2024
114b006
Merge branch 'timsRescore' of https://github.com/compomics/ms2rescore…
ArthurDeclercq Apr 17, 2024
33c38b0
remove unused import
ArthurDeclercq Apr 17, 2024
40425c7
Merge branch 'timsRescore' of https://github.com/compomics/ms2rescore…
ArthurDeclercq Apr 19, 2024
b810b8c
Merge branch 'timsRescore' of https://github.com/compomics/ms2rescore…
ArthurDeclercq Apr 19, 2024
69b5d1a
Merge tag 'main' of https://github.com/compomics/ms2rescore into spec…
ArthurDeclercq Aug 16, 2024
6e2d102
Merge pull request #177 from compomics/main
ArthurDeclercq Aug 16, 2024
11fdc51
integrate mumble into ms2branch
ArthurDeclercq Aug 21, 2024
3140c44
Merge remote-tracking branch 'origin/main' into spectrum-feature-gene…
ArthurDeclercq Sep 23, 2024
883169a
temp removal of sage features before rescoring
ArthurDeclercq Sep 27, 2024
97865e7
Merge branch 'main' of https://github.com/compomics/ms2rescore into s…
ArthurDeclercq Sep 27, 2024
da39ae8
remove psm_file features when rescoring with mumble
ArthurDeclercq Nov 8, 2024
37fff28
linting
SamvPy Nov 19, 2024
e8b59f3
add hyperscore calculation
SamvPy Nov 19, 2024
c51cd34
calibration fixes
ArthurDeclercq Nov 21, 2024
295e37f
changes for mumble implementation
ArthurDeclercq Nov 21, 2024
909860d
change openms peptide formatting
SamvPy Nov 22, 2024
c5902c2
add mumble psm filtering functionality
ArthurDeclercq Nov 22, 2024
6eaceb2
Merge branch 'spectrum-feature-generator' of https://github.com/compo…
ArthurDeclercq Nov 22, 2024
5ce55f5
remove pyopenms dependency for hyperscore calculation
SamvPy Nov 22, 2024
986c5f6
fix spectrum_id accession
ArthurDeclercq Nov 22, 2024
bbecf6a
Merge branch 'spectrum-feature-generator' of https://github.com/compo…
ArthurDeclercq Nov 22, 2024
6fd6053
Merge remote-tracking branch 'origin/main' into spectrum-feature-gene…
paretje Jan 14, 2025
5333e46
remove unused imports
paretje Jan 17, 2025
dd2259f
remove unused import in deeplc feature generator
paretje Jan 17, 2025
d24ef30
add rustyms dependency
paretje Jan 17, 2025
21cafc7
drop rustyms requirement to 0.8.3
paretje Jan 17, 2025
ca9da7d
mumble related changes
ArthurDeclercq Jan 17, 2025
c5b6eb0
add mumble
paretje Jan 17, 2025
aee8ec7
update mumble to use user cache dir
paretje Jan 21, 2025
7ce56c2
bump im2deep dependency
paretje Jan 24, 2025
106ad8f
make mumble and rustyms optional dependancy
ArthurDeclercq Feb 14, 2025
72e2b71
Merge branch 'main' of https://github.com/compomics/ms2rescore into s…
ArthurDeclercq Jun 10, 2025
29aac8a
set defaults in mumble config
ArthurDeclercq Sep 23, 2025
487f661
fix rustyms 0.8.0 -> 0.10.0
SamvPy Dec 3, 2025
9a97ed1
Merge remote-tracking branch 'origin/main' into refactoring
ArthurDeclercq Dec 22, 2025
05078cd
moved maxquant features to ms2
ArthurDeclercq Dec 22, 2025
2011241
im2deep refactoring
ArthurDeclercq Dec 24, 2025
36750b4
ms2pip refactoring
ArthurDeclercq Dec 24, 2025
3091b0f
parsing spectra once and storing spectra objects
ArthurDeclercq Dec 24, 2025
5b3d4c4
directly operate on spectra objects instead of reacquiring them
ArthurDeclercq Dec 24, 2025
a3cbb1b
updated profiling
ArthurDeclercq Dec 24, 2025
a1df72d
removed maxquant generator from fg
ArthurDeclercq Dec 24, 2025
2c7a09b
changes to column names
ArthurDeclercq Jan 5, 2026
e855abf
changes to avoid out of memory error due to multiprocessing
ArthurDeclercq Jan 5, 2026
577df19
replace list with set to reduce lookup time to O(1)
ArthurDeclercq Jan 5, 2026
a80238b
remove unused imports
ArthurDeclercq Jan 12, 2026
abf66b4
migrate ms2 and ms2pip features to ms2rescore-rs
ArthurDeclercq Jan 12, 2026
a9108b9
reimplement deeplc feature calculation
ArthurDeclercq Jan 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ms2rescore/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Union

Expand Down Expand Up @@ -196,7 +197,13 @@ def profile(fnc, filepath):
def inner(*args, **kwargs):
with cProfile.Profile() as profiler:
return_value = fnc(*args, **kwargs)
profiler.dump_stats(filepath + ".profile.prof")

# Add timestamp to profiler output filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
profile_filename = f"{filepath}.profile_{timestamp}.prof"
profiler.dump_stats(profile_filename)
LOGGER.info(f"Profile data written to: {profile_filename}")

return return_value

return inner
Expand Down Expand Up @@ -248,6 +255,7 @@ def main(tims=False):
# Run MS²Rescore
try:
if config["ms2rescore"]["profile"]:
LOGGER.info("Profiling enabled")
profiled_rescore = profile(rescore, config["ms2rescore"]["output_path"])
profiled_rescore(configuration=config)
else:
Expand Down
19 changes: 16 additions & 3 deletions ms2rescore/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
from ms2rescore.parse_spectra import add_precursor_values
from ms2rescore.report import generate
from ms2rescore.rescoring_engines import mokapot, percolator
from ms2rescore.rescoring_engines.mokapot import add_peptide_confidence, add_psm_confidence
from ms2rescore.rescoring_engines.mokapot import (
add_peptide_confidence,
add_psm_confidence,
)
from ms2rescore.utils import filter_mumble_psms

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -114,6 +118,10 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
)
psm_list = psm_list[psms_with_features]

if "mumble" in config["psm_generator"]:
# Remove PSMs where matched_ions_pct drops 25% below the original hit
psm_list = filter_mumble_psms(psm_list, threshold=0.75)

# Write feature names to file
_write_feature_names(feature_names, output_file_root)

Expand Down Expand Up @@ -219,7 +227,10 @@ def _write_feature_names(feature_names, output_file_root):
def _log_id_psms_before(psm_list: PSMList, fdr: float = 0.01, max_rank: int = 1) -> int:
"""Log #PSMs identified before rescoring."""
id_psms_before = (
(psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
(psm_list["qvalue"] <= 0.01)
& (psm_list["rank"] <= max_rank)
& (~psm_list["is_decoy"])
& ([metadata.get("original_psm", True) for metadata in psm_list["metadata"]])
).sum()
logger.info(
f"Found {id_psms_before} identified PSMs with rank <= {max_rank} at {fdr} FDR before "
Expand Down Expand Up @@ -285,7 +296,9 @@ def _calculate_confidence(psm_list: PSMList) -> PSMList:
)

# Recalculate confidence
new_confidence = lin_psm_data.assign_confidence(scores=psm_list["score"])
new_confidence = lin_psm_data.assign_confidence(
scores=list(psm_list["score"])
) # explicity make it a list to avoid TypingError: Failed in nopython mode pipeline (step: nopython frontend) in mokapot

# Add new confidence estimations to PSMList
add_psm_confidence(psm_list, new_confidence)
Expand Down
6 changes: 6 additions & 0 deletions ms2rescore/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,9 @@ class RescoringError(MS2RescoreError):
"""Error while rescoring PSMs."""

pass


class ParseSpectrumError(MS2RescoreError):
"""Error while rescoring PSMs."""

pass
4 changes: 2 additions & 2 deletions ms2rescore/feature_generators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
from ms2rescore.feature_generators.deeplc import DeepLCFeatureGenerator
from ms2rescore.feature_generators.im2deep import IM2DeepFeatureGenerator
from ms2rescore.feature_generators.ionmob import IonMobFeatureGenerator
from ms2rescore.feature_generators.maxquant import MaxQuantFeatureGenerator
from ms2rescore.feature_generators.ms2 import MS2FeatureGenerator
from ms2rescore.feature_generators.ms2pip import MS2PIPFeatureGenerator

FEATURE_GENERATORS: dict[str, type[FeatureGeneratorBase]] = {
"basic": BasicFeatureGenerator,
"ms2pip": MS2PIPFeatureGenerator,
"deeplc": DeepLCFeatureGenerator,
"maxquant": MaxQuantFeatureGenerator,
"ionmob": IonMobFeatureGenerator,
"im2deep": IM2DeepFeatureGenerator,
"ms2": MS2FeatureGenerator,
}
13 changes: 13 additions & 0 deletions ms2rescore/feature_generators/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def add_features(self, psm_list: PSMList) -> None:
charge_states = np.array([psm.peptidoform.precursor_charge for psm in psm_list])
precursor_mzs = psm_list["precursor_mz"]
scores = psm_list["score"]
peptide_lengths = np.array([len(psm.peptidoform.sequence) for psm in psm_list])

has_charge = None not in charge_states
has_mz = None not in precursor_mzs and has_charge
Expand All @@ -74,13 +75,25 @@ def add_features(self, psm_list: PSMList) -> None:
if has_score:
self._feature_names.append("search_engine_score")

if has_mz and has_charge:
experimental_mass = (precursor_mzs * charge_n) - (charge_n * 1.007276466812)
theoretical_mass = (theo_mz * charge_n) - (charge_n * 1.007276466812)
mass_error = experimental_mass - theoretical_mass
self._feature_names.extend(["theoretical_mass", "experimental_mass", "mass_error"])

self._feature_names.append("pep_len")

for i, psm in enumerate(psm_list):
psm.rescoring_features.update(
dict(
**{"charge_n": charge_n[i]} if has_charge else {},
**charge_one_hot[i] if has_charge else {},
**{"abs_ms1_error_ppm": abs_ms1_error_ppm[i]} if has_mz else {},
**{"search_engine_score": scores[i]} if has_score else {},
**{"theoretical_mass": theoretical_mass[i]} if has_mz and has_charge else {},
**{"experimental_mass": experimental_mass[i]} if has_mz and has_charge else {},
**{"mass_error": mass_error[i]} if has_mz and has_charge else {},
**{"pep_len": peptide_lengths[i]},
)
)

Expand Down
Loading
Loading