From e60e167796dfa8b1b49eeccc7676a04aca403d60 Mon Sep 17 00:00:00 2001 From: tristan-ranff Date: Fri, 14 Oct 2022 10:12:41 +0200 Subject: [PATCH] rename unify csv and remove 2 legacy parameters --- jsons/parameters.json | 246 +++++++++++++++++++----------------------- jsons/styles.json | 16 +-- 2 files changed, 116 insertions(+), 146 deletions(-) diff --git a/jsons/parameters.json b/jsons/parameters.json index e423c56..e793349 100644 --- a/jsons/parameters.json +++ b/jsons/parameters.json @@ -68,11 +68,11 @@ "unimod_name": "Methylpyrroline" } }, - "description": "Unusual aminoacids that are not accepted (e.g. by unify_csv_1_0_0), but reported by some engines. Given as a dictionary mapping on he original_aa as well as the unimod modification name. U is now accepted as regular amino acid (2017/03/30).In Tag Graph this can be used to define amino acids other thanthe standard 20 to be included in the search.For those, chemical composition, monoisotopic mass and avg massas well as name and 3-letter code need to be given.", + "description": "Unusual aminoacids that are not accepted (e.g. by pyProtista), but reported by some engines. Given as a dictionary mapping on he original_aa as well as the unimod modification name. U is now accepted as regular amino acid (2017/03/30).In Tag Graph this can be used to define amino acids other thanthe standard 20 to be included in the search.For those, chemical composition, monoisotopic mass and avg massas well as name and 3-letter code need to be given.", "key_translations": { "compomics_utilities_style_1": "aa_exception_dict", + "pyprotista_style_1": "aa_exception_dict", "tag_graph_style_1": "Amino Acids", - "unify_csv_style_1": "aa_exception_dict", "upeptide_mapper_style_1": "aa_exception_dict", "ursgal_style_1": "aa_exception_dict" }, @@ -375,11 +375,11 @@ "percolator_style_1": "bigger_scores_better", "percolator_style_2": "bigger_scores_better", "ptminer_style_1": "bigger_scores_better", + "pyprotista_style_1": "bigger_scores_better", "q_value_calculator_style_1": "bigger_scores_better", "qvality_style_1": "-r", "sanitize_csv_style_1": "bigger_scores_better", "svm_style_1": "bigger_scores_better", - "unify_csv_style_1": "bigger_scores_better", "ursgal_style_1": "bigger_scores_better" }, "name": "bigger_scores_better", @@ -1213,7 +1213,11 @@ true ] ], - "q_value_calculator_style_1": [ + "pyprotista_style_1": [ + [ + "comet_2020_01_4", + false + ], [ "deepnovo_0_0_1", true @@ -1282,6 +1286,10 @@ "msamanda_2_0_0_14665", true ], + [ + "msamanda_2_0_0_17442", + true + ], [ "msamanda_2_0_0_9695", true @@ -1415,7 +1423,7 @@ true ] ], - "qvality_style_1": [ + "q_value_calculator_style_1": [ [ "deepnovo_0_0_1", true @@ -1617,7 +1625,7 @@ true ] ], - "sanitize_csv_style_1": [ + "qvality_style_1": [ [ "deepnovo_0_0_1", true @@ -1819,7 +1827,7 @@ true ] ], - "svm_style_1": [ + "sanitize_csv_style_1": [ [ "deepnovo_0_0_1", true @@ -1976,6 +1984,10 @@ "pglyco_db_2_2_0", true ], + [ + "pglyco_db_2_2_2", + true + ], [ "pipi_1_4_5", true @@ -1984,6 +1996,10 @@ "pipi_1_4_6", true ], + [ + "pnovo_3_1_3", + true + ], [ "tag_graph_1_8_0", true @@ -2013,11 +2029,7 @@ true ] ], - "unify_csv_style_1": [ - [ - "comet_2020_01_4", - false - ], + "svm_style_1": [ [ "deepnovo_0_0_1", true @@ -2086,10 +2098,6 @@ "msamanda_2_0_0_14665", true ], - [ - "msamanda_2_0_0_17442", - true - ], [ "msamanda_2_0_0_9695", true @@ -2178,10 +2186,6 @@ "pglyco_db_2_2_0", true ], - [ - "pglyco_db_2_2_2", - true - ], [ "pipi_1_4_5", true @@ -2190,10 +2194,6 @@ "pipi_1_4_6", true ], - [ - "pnovo_3_1_3", - true - ], [ "tag_graph_1_8_0", true @@ -3556,8 +3556,8 @@ "pglyco_db_style_1": "fasta", "pipi_style_1": "db", "ptminer_style_1": "protein_database", + "pyprotista_style_1": "database", "tag_graph_style_1": "fmindex", - "unify_csv_style_1": "database", "upeptide_mapper_style_1": "database", "ursgal_style_1": "database", "xtandem_style_1": "file URL" @@ -3711,7 +3711,7 @@ "percolator_style_1": "-P", "percolator_style_2": "-P", "ptminer_style_1": "decoy_tag", - "unify_csv_style_1": "decoy_tag", + "pyprotista_style_1": "decoy_tag", "upeptide_mapper_style_1": "decoy_tag", "ursgal_style_1": "decoy_tag", "xtandem2csv_style_1": "decoy_tag" @@ -3981,7 +3981,7 @@ "merge_analytical_replicates_style_1": "delimiter", "percolator_style_1": "delimiter", "percolator_style_2": "delimiter", - "unify_csv_style_1": "delimiter", + "pyprotista_style_1": "delimiter", "upeptide_mapper_style_1": "delimiter", "ursgal_style_1": "delimiter" }, @@ -4339,8 +4339,8 @@ "pglyco_db_style_1": "enzyme", "pipi_style_1": "enzyme", "pnovo_style_1": "enzyme", + "pyprotista_style_1": "enzyme", "tag_graph_style_1": "Enzyme", - "unify_csv_style_1": "enzyme", "ursgal_style_1": "enzyme", "xtandem_style_1": "protein, cleavage site" }, @@ -5526,37 +5526,7 @@ "Trypsin KR _ C" ] ], - "tag_graph_style_1": [ - [ - "argc", - "R;[^P].*" - ], - [ - "aspn", - ".*;D" - ], - [ - "formic_acid", - "D;[^P].*" - ], - [ - "gluc", - "D|E;[^P].*" - ], - [ - "lysc", - "K;[^P].*" - ], - [ - "trypsin", - "K|R;[^P].*" - ], - [ - "trypsin_p", - "K|R;.*" - ] - ], - "unify_csv_style_1": [ + "pyprotista_style_1": [ [ "argc", "(?<=R)(?![P])" @@ -5630,6 +5600,36 @@ "(?<=[KR])" ] ], + "tag_graph_style_1": [ + [ + "argc", + "R;[^P].*" + ], + [ + "aspn", + ".*;D" + ], + [ + "formic_acid", + "D;[^P].*" + ], + [ + "gluc", + "D|E;[^P].*" + ], + [ + "lysc", + "K;[^P].*" + ], + [ + "trypsin", + "K|R;[^P].*" + ], + [ + "trypsin_p", + "K|R;.*" + ] + ], "ursgal_style_1": [ [ "argc", @@ -5846,7 +5846,7 @@ "msgfplus_style_1": "-ntt", "myrimatch_style_1": "MinTerminiCleavages", "omssa_style_1": "semi_enzyme", - "unify_csv_style_1": "semi_enzyme", + "pyprotista_style_1": "semi_enzyme", "ursgal_style_1": "enzyme_specificity", "xtandem_style_1": "protein, cleavage semi" }, @@ -7069,7 +7069,7 @@ }, { "default_value": null, - "description": "Translate output headers into Ursgal unify_csv style headers\n 'None' : None", + "description": "Translate output headers into Ursgal pyProtista style headers\n 'None' : None", "key_translations": { "comet_style_1": "header_translations", "deepnovo_style_1": "header_translations", @@ -8888,9 +8888,9 @@ }, { "default_value": true, - "description": "X!tandem searches for peptides broken between Asp (D) and Pro (P) for every enzyme. Therefore, it reports peptides that are not enzymatically cleaved. Specify, if those should be kept during unify_csv or removed.", + "description": "X!tandem searches for peptides broken between Asp (D) and Pro (P) for every enzyme. Therefore, it reports peptides that are not enzymatically cleaved. Specify, if those should be kept during pyProtista or removed.", "key_translations": { - "unify_csv_style_1": "keep_asp_pro_broken_peps", + "pyprotista_style_1": "keep_asp_pro_broken_peps", "ursgal_style_1": "keep_asp_pro_broken_peps" }, "name": "keep_asp_pro_broken_peps", @@ -9815,7 +9815,7 @@ "omssa_style_1": "-v", "pglyco_db_style_1": "max_miss_cleave", "pipi_style_1": "missed_cleavage", - "unify_csv_style_1": "max_missed_cleavages", + "pyprotista_style_1": "max_missed_cleavages", "upeptide_mapper_style_1": "max_missed_cleavages", "ursgal_style_1": "max_missed_cleavages", "xtandem_style_1": "scoring, maximum missed cleavage sites" @@ -10769,9 +10769,9 @@ "pnovo_style_1": "modifications", "ptminer_style_1": "modifications", "ptmshepherd_style_1": "varmod_masses", + "pyprotista_style_1": "modifications", "pyqms_style_1": "modifications", "tag_graph_style_1": "modifications", - "unify_csv_style_1": "modifications", "ursgal_style_1": "modifications", "xtandem_style_1": [ "residue, modification mass", @@ -12838,7 +12838,7 @@ "omssa_style_1": "-ti", "pepnovo_style_1": "-correct_pm", "ptmshepherd_style_1": "isotope_error", - "unify_csv_style_1": "precursor_isotope_range", + "pyprotista_style_1": "precursor_isotope_range", "ursgal_style_1": "precursor_isotope_range", "xtandem_style_1": "spectrum, parent monoisotopic mass isotope error" }, @@ -13031,7 +13031,7 @@ "msgfplus_style_1": "-t_minus", "novor_style_1": "precursorErrorTol_part2", "omssa_style_1": "-te_part1", - "unify_csv_style_1": "precursor_mass_tolerance_minus", + "pyprotista_style_1": "precursor_mass_tolerance_minus", "ursgal_style_1": "precursor_mass_tolerance_minus", "xtandem_style_1": "spectrum, parent monoisotopic mass error minus" }, @@ -13058,7 +13058,7 @@ "msgfplus_style_1": "-t_plus", "novor_style_1": "precursorErrorTol_part1", "omssa_style_1": "-te_part2", - "unify_csv_style_1": " precursor_mass_tolerance_minus", + "pyprotista_style_1": " precursor_mass_tolerance_minus", "ursgal_style_1": "precursor_mass_tolerance_plus", "xtandem_style_1": "spectrum, parent monoisotopic mass error plus" }, @@ -13636,9 +13636,9 @@ "description": "List of column names that are used to define unique PSMs and to merge multiple lines of the same PSM (if specified). The validation_score_field is automatically added to this list. ", "key_translations": { "combine_pep_style_1": "columns_for_grouping", + "pyprotista_style_1": "psm_defining_colnames", "sanitize_csv_style_1": "psm_defining_colnames", "ucontroller_style_1": "psm_defining_colnames", - "unify_csv_style_1": "psm_defining_colnames", "ursgal_style_1": "psm_defining_colnames" }, "name": "psm_defining_colnames", @@ -14228,7 +14228,7 @@ "default_value": 3, "description": "Masses of modifications are rounded in order to match them to their corresponding unimod name. Use this parameter to set the number of decimal places after rounding.", "key_translations": { - "unify_csv_style_1": "rounded_mass_decimals", + "pyprotista_style_1": "rounded_mass_decimals", "ursgal_style_1": "rounded_mass_decimals" }, "name": "rounded_mass_decimals", @@ -14263,10 +14263,10 @@ "description": "name of the pickle that is used to map the retention time", "key_translations": { "mgf_to_rt_lookup_style_1": "rt_pickle_name", + "pyprotista_style_1": "scan_rt_lookup_path", "sugarpy_plot_style_1": "scan_rt_lookup", "sugarpy_run_style_1": "scan_rt_lookup", "ucontroller_style_1": "rt_pickle_name", - "unify_csv_style_1": "scan_rt_lookup_path", "ursgal_style_1": "rt_pickle_name" }, "name": "rt_pickle_name", @@ -14605,7 +14605,7 @@ }, { "default_value": false, - "description": "Search for potential single amino acid polymorphisms. 'True' might cause problems in the downstream processing of th result files (unify_csv, ...)", + "description": "Search for potential single amino acid polymorphisms. 'True' might cause problems in the downstream processing of th result files (pyProtista, ...)", "key_translations": { "ursgal_style_1": "search_for_saps", "xtandem_style_1": "protein, saps" @@ -15072,7 +15072,7 @@ "default_value": "any", "description": "Determines whether 'all' or 'any' found Sequences Pre/Post AA need to be cleaved correctly", "key_translations": { - "unify_csv_style_1": "terminal_cleavage_site_integrity", + "pyprotista_style_1": "terminal_cleavage_site_integrity", "ursgal_style_1": "terminal_cleavage_site_integrity" }, "name": "terminal_cleavage_site_integrity", @@ -15357,21 +15357,6 @@ "value_translations": {}, "value_type": "float" }, - { - "default_value": "unify_csv_1_0_0", - "description": "unify csv converter version: version name", - "key_translations": { - "ucontroller_style_1": "unify_csv_converter_version", - "ursgal_style_1": "unify_csv_converter_version" - }, - "name": "unify_csv_converter_version", - "tag": [ - "node_versions" - ], - "triggers_rerun": true, - "value_translations": {}, - "value_type": "str" - }, { "default_value": [], "description": "list of additional unimod xml files", @@ -15383,7 +15368,7 @@ "msfragger_style_3": "unimod_xml_file_list", "msgfplus_style_1": "unimod_xml_file_list", "omssa_style_1": "unimod_xml_file_list", - "unify_csv_style_1": "unimod_xml_file_list", + "pyprotista_style_1": "unimod_xml_file_list", "ursgal_style_1": "unimod_xml_file_list", "xtandem_style_1": "unimod_xml_file_list" }, @@ -15525,21 +15510,6 @@ "value_translations": {}, "value_type": "bool" }, - { - "default_value": false, - "description": "Use pyQms for accurate calculation of isotopologue m/z. This will affect the accuracy (ppm) calculation as well. If True, unify_csv will be significantly slower. Please note that this does not work for any type of labeling yet.", - "key_translations": { - "unify_csv_style_1": "use_pyqms_for_mz_calculation", - "ursgal_style_1": "use_pyqms_for_mz_calculation" - }, - "name": "use_pyqms_for_mz_calculation", - "tag": [ - "conversion" - ], - "triggers_rerun": true, - "value_translations": {}, - "value_type": "bool" - }, { "default_value": true, "description": "Use filter for low quality spectra.", @@ -16161,12 +16131,12 @@ "percolator_style_1": "validation_score_field", "percolator_style_2": "validation_score_field", "ptminer_style_1": "validation_score_field", + "pyprotista_style_1": "validation_score_field", "q_value_calculator_style_1": "validation_score_field", "qvality_style_1": "validation_score_field", "sanitize_csv_style_1": "validation_score_field", "svm_style_1": "validation_score_field", "ucontroller_style_1": "validation_score_field", - "unify_csv_style_1": "validation_score_field", "ursgal_style_1": "validation_score_field" }, "name": "validation_score_field", @@ -17048,7 +17018,7 @@ "x!tandem:hyperscore" ] ], - "q_value_calculator_style_1": [ + "pyprotista_style_1": [ [ "comet_2020_01_4", "comet:e_value" @@ -17227,15 +17197,15 @@ ], [ "pipi_1_4_5", - "pipi:score" + "PIPI:score" ], [ "pipi_1_4_6", - "pipi:score" + "PIPI:score" ], [ "tag_graph_1_8_0", - "taggraph:: 1-log10_em" + "taggraph:1_log10_em" ], [ "xtandem_alanine", @@ -17262,7 +17232,11 @@ "x!tandem:hyperscore" ] ], - "qvality_style_1": [ + "q_value_calculator_style_1": [ + [ + "comet_2020_01_4", + "comet:e_value" + ], [ "deepnovo_0_0_1", "deepnovo:score" @@ -17331,6 +17305,10 @@ "msamanda_2_0_0_14665", "amanda:score" ], + [ + "msamanda_2_0_0_17442", + "amanda:score" + ], [ "msamanda_2_0_0_9695", "amanda:score" @@ -17431,25 +17409,17 @@ "pglyco_db_2_2_0", "pglyco:total_score" ], - [ - "pglyco_db_2_2_2", - "pglyco:total_score" - ], [ "pipi_1_4_5", - "PIPI:score" + "pipi:score" ], [ "pipi_1_4_6", - "PIPI:score" - ], - [ - "pnovo_3_1_3", - "pnovo:score" + "pipi:score" ], [ "tag_graph_1_8_0", - "taggraph:1_log10_em" + "taggraph:: 1-log10_em" ], [ "xtandem_alanine", @@ -17476,7 +17446,7 @@ "x!tandem:hyperscore" ] ], - "sanitize_csv_style_1": [ + "qvality_style_1": [ [ "deepnovo_0_0_1", "deepnovo:score" @@ -17690,7 +17660,7 @@ "x!tandem:hyperscore" ] ], - "svm_style_1": [ + "sanitize_csv_style_1": [ [ "deepnovo_0_0_1", "deepnovo:score" @@ -17904,7 +17874,7 @@ "x!tandem:hyperscore" ] ], - "ucontroller_style_1": [ + "svm_style_1": [ [ "deepnovo_0_0_1", "deepnovo:score" @@ -18093,10 +18063,6 @@ "tag_graph_1_8_0", "taggraph:1_log10_em" ], - [ - "unknown", - "" - ], [ "xtandem_alanine", "x!tandem:hyperscore" @@ -18122,11 +18088,7 @@ "x!tandem:hyperscore" ] ], - "unify_csv_style_1": [ - [ - "comet_2020_01_4", - "comet:e_value" - ], + "ucontroller_style_1": [ [ "deepnovo_0_0_1", "deepnovo:score" @@ -18195,10 +18157,6 @@ "msamanda_2_0_0_14665", "amanda:score" ], - [ - "msamanda_2_0_0_17442", - "amanda:score" - ], [ "msamanda_2_0_0_9695", "amanda:score" @@ -18299,6 +18257,10 @@ "pglyco_db_2_2_0", "pglyco:total_score" ], + [ + "pglyco_db_2_2_2", + "pglyco:total_score" + ], [ "pipi_1_4_5", "PIPI:score" @@ -18307,10 +18269,18 @@ "pipi_1_4_6", "PIPI:score" ], + [ + "pnovo_3_1_3", + "pnovo:score" + ], [ "tag_graph_1_8_0", "taggraph:1_log10_em" ], + [ + "unknown", + "" + ], [ "xtandem_alanine", "x!tandem:hyperscore" @@ -18471,7 +18441,7 @@ "default_value": 6, "description": "word length used to index peptide mapper, smaller word len requires more memory", "key_translations": { - "unify_csv_style_1": "word_len", + "pyprotista_style_1": "word_len", "upeptide_mapper_style_1": "word_len", "ursgal_style_1": "word_len" }, diff --git a/jsons/styles.json b/jsons/styles.json index 6fdf0e4..abbdefd 100644 --- a/jsons/styles.json +++ b/jsons/styles.json @@ -367,14 +367,6 @@ "1.1.2" ] }, - { - "name": "Unify CSV", - "reference": "Kremer, L. P. M., Leufken, J., Oyunchimeg, P., Schulze, S. & Fufezan, C. (2016) Ursgal, Universal Python Module Combining Common Bottom-Up Proteomics Tools for Large-Scale Analysis. J. Proteome res. 15, 788-794.", - "style": "unify_csv_style_1", - "versions": [ - "1.0.0" - ] - }, { "name": "UpeptideMapper", "reference": "Kremer, L. P. M., Leufken, J., Oyunchimeg, P., Schulze, S. & Fufezan, C. (2016) Ursgal, Universal Python Module Combining Common Bottom-Up Proteomics Tools for Large-Scale Analysis. J. Proteome res. 15, 788-794.", @@ -671,6 +663,14 @@ "1.0.0" ] }, + { + "name": "pyProtista", + "reference": "Ursgal Team (2022)", + "style": "pyprotista_style_1", + "versions": [ + "1.7.4" + ] + }, { "name": "pyqms", "reference": "Leufken J, Niehues A, Sarin LP, Wessel F, Hippler M, Leidel SA, Fufezan C (2017) pyQms enables universal and accurate quantification of mass spectrometry data",