From dabf5e4c856d174d7484fe6443a18592cdb0d32d Mon Sep 17 00:00:00 2001 From: JSBoejer <142427441+JSBoejer@users.noreply.github.com> Date: Mon, 11 Aug 2025 09:43:28 +0200 Subject: [PATCH 1/2] Input column update motif_assignment.py Correcting input columns for bin_motifs file for motif_assignment. --- nanomotif/mtase_linker/src/motif_assignment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanomotif/mtase_linker/src/motif_assignment.py b/nanomotif/mtase_linker/src/motif_assignment.py index 5b18dd4..5296a4f 100644 --- a/nanomotif/mtase_linker/src/motif_assignment.py +++ b/nanomotif/mtase_linker/src/motif_assignment.py @@ -36,7 +36,7 @@ nanomotif_table['assign_mod_type'] = nanomotif_table['mod_type'].apply(recode_mod_type) #Set motif acceptance threshold as >=0.5 mean_methylation and remove ambiguous motifs in nanomotif_table -mean_methylation = nanomotif_table['n_mod'] / (nanomotif_table['n_mod'] + nanomotif_table['n_nomod']) +mean_methylation = nanomotif_table['n_mod_bin'] / (nanomotif_table['n_mod_bin'] + nanomotif_table['n_nomod_bin']) nanomotif_table_mm50 = nanomotif_table[mean_methylation >= snakemake.params['MINIMUM_METHYLATION']] @@ -268,7 +268,7 @@ MTase_table_assigned.columns = ['bin', 'gene_id', 'contig', 'mod_type_pred', 'sub_type_pred', 'RM_system', 'DF_system_ID', 'motif_type_pred', 'REbase_ID', 'motif_pred', 'linked', 'detected_motif'] MTase_table_assigned_cl = MTase_table_assigned.dropna(subset=['bin']) -nanomotif_table_mm50 = nanomotif_table_mm50[['bin', 'mod_type', 'motif', 'mod_position', 'n_mod', 'n_nomod', 'motif_type', 'motif_complement', 'mod_position_complement', 'n_mod_complement', 'n_nomod_complement', 'linked', 'candidate_genes']] +nanomotif_table_mm50 = nanomotif_table_mm50[['bin', 'mod_type', 'motif', 'mod_position', 'n_mod_bin', 'n_nomod_bin', 'motif_type', 'motif_complement', 'mod_position_complement', 'n_mod_complement', 'n_nomod_complement', 'linked', 'candidate_genes']] #%% MTase_table_assigned_cl.to_csv(snakemake.output['MTase_assignment_table'] , sep='\t', index=False) nanomotif_table_mm50.to_csv(snakemake.output['nanomotif_assignment_table'] , sep='\t', index=False) From 1a8d98bddb9eed6249c1cd3e3ed8d87b759422d4 Mon Sep 17 00:00:00 2001 From: JSBoejer Date: Wed, 12 Nov 2025 12:56:54 +0100 Subject: [PATCH 2/2] column compatibility with new bin-motifs output --- nanomotif/datasets/e_coli_bin-motifs.tsv | 8 ++-- .../mtase_linker/src/motif_assignment.py | 48 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/nanomotif/datasets/e_coli_bin-motifs.tsv b/nanomotif/datasets/e_coli_bin-motifs.tsv index 1676c0e..1c046e4 100644 --- a/nanomotif/datasets/e_coli_bin-motifs.tsv +++ b/nanomotif/datasets/e_coli_bin-motifs.tsv @@ -1,4 +1,4 @@ -bin mod_type motif mod_position n_mod_bin n_nomod_bin motif_type motif_complement mod_position_complement n_mod_complement n_nomod_complement -e_coli_k12 a GATC 1 38207 42 palindrome GATC 1 38207 42 -e_coli_k12 a GCACNNNNNNGTT 2 591 5 bipartite AACNNNNNNGTGC 1 586 10 -e_coli_k12 m CCWGG 1 23336 759 palindrome CCWGG 1 23336 759 +reference motif mod_position mod_type n_mod n_nomod motif_type motif_complement mod_position_complement n_mod_complement n_nomod_complement +e_coli_k12 GATC 1 a 38207 42 palindrome GATC 1 38207 42 +e_coli_k12 GCACNNNNNNGTT 2 a 591 5 bipartite AACNNNNNNGTGC 1 586 10 +e_coli_k12 CCWGG 1 m 23336 759 palindrome CCWGG 1 23336 759 \ No newline at end of file diff --git a/nanomotif/mtase_linker/src/motif_assignment.py b/nanomotif/mtase_linker/src/motif_assignment.py index 5296a4f..da74542 100644 --- a/nanomotif/mtase_linker/src/motif_assignment.py +++ b/nanomotif/mtase_linker/src/motif_assignment.py @@ -36,7 +36,7 @@ nanomotif_table['assign_mod_type'] = nanomotif_table['mod_type'].apply(recode_mod_type) #Set motif acceptance threshold as >=0.5 mean_methylation and remove ambiguous motifs in nanomotif_table -mean_methylation = nanomotif_table['n_mod_bin'] / (nanomotif_table['n_mod_bin'] + nanomotif_table['n_nomod_bin']) +mean_methylation = nanomotif_table['n_mod'] / (nanomotif_table['n_mod'] + nanomotif_table['n_nomod']) nanomotif_table_mm50 = nanomotif_table[mean_methylation >= snakemake.params['MINIMUM_METHYLATION']] @@ -44,13 +44,13 @@ #%% Importing bin_contig table from mmlong2 contig_bin_tsv_path = snakemake.input['contig_bin'] contig_bin_df = pd.read_csv(contig_bin_tsv_path , sep = "\t", header = None) -contig_bin_df.columns = ['contig', 'bin name'] +contig_bin_df.columns = ['contig', 'reference'] #%% Merging dataframes # Merging bin origin information into DefenseFinder based on contig number. -bin_DF_blastp_df = DefenseFinder_blastp_df.merge(contig_bin_df[['bin name', 'contig']], on = 'contig', how = 'left') -bin_DF_blastp_df_filtered = bin_DF_blastp_df.dropna(subset=['bin name']) +bin_DF_blastp_df = DefenseFinder_blastp_df.merge(contig_bin_df[['reference', 'contig']], on = 'contig', how = 'left') +bin_DF_blastp_df_filtered = bin_DF_blastp_df.dropna(subset=['reference']) # Merging modtype table into Defensefinder table MTase_table = bin_DF_blastp_df_filtered.merge(gene_mod_df[['gene_id', 'mod_type']], on = 'gene_id', how = 'left') @@ -101,17 +101,17 @@ MTase_table_assigned.loc[:,'linked'] = False -#Group by 'bin name' and 'mod type' -Cgrouped_MTase = combined_MTase_df.groupby(['bin name', 'mod_type'], dropna=True) -grouped_nanomotif = nanomotif_table_mm50.groupby(['bin', 'assign_mod_type']) +#Group by 'reference' and 'mod type' +Cgrouped_MTase = combined_MTase_df.groupby(['reference', 'mod_type'], dropna=True) +grouped_nanomotif = nanomotif_table_mm50.groupby(['reference', 'assign_mod_type']) #%% Assigment rule system # Step 1: Priority 1 Assignment based on similarity to motif guess -for (bin_name, assign_mod_type), nanomotif_group in grouped_nanomotif: - if (bin_name, assign_mod_type) in Cgrouped_MTase.groups: - mtase_group = Cgrouped_MTase.get_group((bin_name, assign_mod_type)) +for (reference, assign_mod_type), nanomotif_group in grouped_nanomotif: + if (reference, assign_mod_type) in Cgrouped_MTase.groups: + mtase_group = Cgrouped_MTase.get_group((reference, assign_mod_type)) for idx, row in nanomotif_group.iterrows(): # Calculate reverse complement of the motif rev_comp_motif = reverse_complement(row['motif']) @@ -158,17 +158,17 @@ MTase_table_assigned['detected_motif'] = MTase_table_assigned['detected_motif'].replace('nan', np.nan) #%% #%% Re-group to exclude assigned entries -grouped_nanomotif = nanomotif_table_mm50[nanomotif_table_mm50['linked'] != True].groupby(['bin', 'assign_mod_type']) +grouped_nanomotif = nanomotif_table_mm50[nanomotif_table_mm50['linked'] != True].groupby(['reference', 'assign_mod_type']) idx_MTase_table_assigned = MTase_table_assigned[MTase_table_assigned['linked'] != True].index #Extract indicies of non assigned entries #%% combined_MTase_df = combined_MTase_df.loc[idx_MTase_table_assigned] #FIlter combined_MTase based on extracted indicies -Cgrouped_MTase = combined_MTase_df.groupby(['bin name', 'mod_type'], dropna=True) +Cgrouped_MTase = combined_MTase_df.groupby(['reference', 'mod_type'], dropna=True) #%% # Step 2: Priority 2 Assignment: based single subtypes in group -for (bin_name, assign_mod_type), nanomotif_group in grouped_nanomotif: - if (bin_name, assign_mod_type) in Cgrouped_MTase.groups: - mtase_group = Cgrouped_MTase.get_group((bin_name, assign_mod_type)) +for (reference, assign_mod_type), nanomotif_group in grouped_nanomotif: + if (reference, assign_mod_type) in Cgrouped_MTase.groups: + mtase_group = Cgrouped_MTase.get_group((reference, assign_mod_type)) # Count unique motif types in nanomotif and MTase table for idx, row in nanomotif_group.iterrows(): @@ -209,15 +209,15 @@ MTase_table_assigned['detected_motif'] = MTase_table_assigned['detected_motif'].replace('nan', np.nan) #%% Re-group to exclude assigned entries -grouped_nanomotif = nanomotif_table_mm50[nanomotif_table_mm50['linked'] != True].groupby(['bin', 'assign_mod_type']) +grouped_nanomotif = nanomotif_table_mm50[nanomotif_table_mm50['linked'] != True].groupby(['reference', 'assign_mod_type']) idx_MTase_table_assigned = MTase_table_assigned[MTase_table_assigned['linked'] != True].index #Extract indicies of non assigned entries combined_MTase_df = combined_MTase_df.loc[idx_MTase_table_assigned] #Filter combined_MTase based on extracted indicies -Cgrouped_MTase = combined_MTase_df.groupby(['bin name', 'mod_type'], dropna=True) +Cgrouped_MTase = combined_MTase_df.groupby(['reference', 'mod_type'], dropna=True) #%% Step 3: Assign candidate genes -for (bin_name, assign_mod_type), nanomotif_group in grouped_nanomotif: - if (bin_name, assign_mod_type) in Cgrouped_MTase.groups: - mtase_group = Cgrouped_MTase.get_group((bin_name, assign_mod_type)) +for (reference, assign_mod_type), nanomotif_group in grouped_nanomotif: + if (reference, assign_mod_type) in Cgrouped_MTase.groups: + mtase_group = Cgrouped_MTase.get_group((reference, assign_mod_type)) # Count unique motif types in nanomotif and MTase table for idx, row in nanomotif_group.iterrows(): @@ -264,11 +264,11 @@ # %% -MTase_table_assigned = MTase_table_assigned[['bin name', 'gene_id', 'contig', 'mod_type', 'sub_type', 'RM_system', 'sys_id', 'motif_type', 'REbase_ID', 'motif_guess', 'linked', 'detected_motif']] -MTase_table_assigned.columns = ['bin', 'gene_id', 'contig', 'mod_type_pred', 'sub_type_pred', 'RM_system', 'DF_system_ID', 'motif_type_pred', 'REbase_ID', 'motif_pred', 'linked', 'detected_motif'] -MTase_table_assigned_cl = MTase_table_assigned.dropna(subset=['bin']) +MTase_table_assigned = MTase_table_assigned[['reference', 'gene_id', 'contig', 'mod_type', 'sub_type', 'RM_system', 'sys_id', 'motif_type', 'REbase_ID', 'motif_guess', 'linked', 'detected_motif']] +MTase_table_assigned.columns = ['reference', 'gene_id', 'contig', 'mod_type_pred', 'sub_type_pred', 'RM_system', 'DF_system_ID', 'motif_type_pred', 'REbase_ID', 'motif_pred', 'linked', 'detected_motif'] +MTase_table_assigned_cl = MTase_table_assigned.dropna(subset=['reference']) -nanomotif_table_mm50 = nanomotif_table_mm50[['bin', 'mod_type', 'motif', 'mod_position', 'n_mod_bin', 'n_nomod_bin', 'motif_type', 'motif_complement', 'mod_position_complement', 'n_mod_complement', 'n_nomod_complement', 'linked', 'candidate_genes']] +nanomotif_table_mm50 = nanomotif_table_mm50[['reference', 'motif', 'mod_position', 'mod_type', 'n_mod', 'n_nomod', 'motif_type', 'motif_complement', 'mod_position_complement', 'n_mod_complement', 'n_nomod_complement', 'linked', 'candidate_genes']] #%% MTase_table_assigned_cl.to_csv(snakemake.output['MTase_assignment_table'] , sep='\t', index=False) nanomotif_table_mm50.to_csv(snakemake.output['nanomotif_assignment_table'] , sep='\t', index=False)