Skip to content

Commit 930deb4

Browse files
committed
fix
1 parent e071cb3 commit 930deb4

1 file changed

Lines changed: 15 additions & 5 deletions

File tree

ogs_merge/ogs_merge

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -484,17 +484,24 @@ class OgsMerger():
484484

485485
base_gff_in = open(self.filtered_base_gff, 'r')
486486
base_gff_out = open(self.tmpdir + '/base_cds.gff', 'w+')
487+
488+
if not self.regex_rna:
489+
rna_reg = r'Parent=([a-zA-Z0-9]+)([\.0-9]+)?([-_]R[A-Z]+)?(,[a-zA-Z0-9\.\-_]*)?'
490+
else:
491+
rna_reg = r'Parent=' + self.regex_rna
492+
493+
if not self.regex_rna_replace:
494+
rna_reg_rep = r'ID=\1'
495+
else:
496+
rna_reg_rep = r'ID=' + self.regex_rna_replace
497+
487498
for li in base_gff_in:
488499
cols = li.strip().split()
489500
# FIXME CDS could be more appropriate (or maybe not...)
490501
if not li.startswith("#") and cols[2] == 'exon':
491502
cols[8] = re.sub(r'ID=([a-zA-Z0-9]+)', r'exID=\1', cols[8]) # remove already set id
492503
# Generate a fake id based on Parent + remove multiple parents (ie when an exon is part of multiple isoforms)
493-
if not self.regex_rna:
494-
rna_reg = r'Parent=([a-zA-Z0-9]+)([\.0-9]+)?([-_]R[A-Z]+)?(,[a-zA-Z0-9\.\-_]*)?'
495-
else:
496-
rna_reg = r'Parent=' + self.regex_rna
497-
cols[8] = re.sub(rna_reg, r'ID=\1', cols[8])
504+
cols[8] = re.sub(rna_reg, rna_reg_rep, cols[8])
498505
cols[8] = cols[8].rstrip(";") # gff2bed doesn't like trailing ;
499506
print('\t'.join(cols), file=base_gff_out)
500507
base_gff_out.close()
@@ -971,6 +978,8 @@ class OgsMerger():
971978
parser.add_argument("-d", "--deleted", help="File containing a list of mRNAs to remove")
972979
parser.add_argument("-o", "--out_prefix", help="Prefix for output files (default=<ogs_name>_<today's date>)")
973980
parser.add_argument("--regex_rna", help="Regex matching mRNA ids, with a capturing group around the gene id without version suffix (default='([a-zA-Z0-9]+)([\\.0-9]+)?([-_]R[A-Z]+)?(,[a-zA-Z0-9\\.\\-_]*)?' )")
981+
parser.add_argument("--regex_rna_replace", help="Replacement string to create a gene id from regex_rna captured group, where {id} is the captured group (default='{id}' )")
982+
974983
args = parser.parse_args()
975984

976985
self.base_gff = args.base_gff
@@ -984,6 +993,7 @@ class OgsMerger():
984993
self.id_regex = args.id_regex
985994
self.id_syntax = args.id_syntax
986995
self.regex_rna = args.regex_rna
996+
self.regex_rna_replace = args.regex_rna_replace
987997

988998
self.out_prefix = args.out_prefix
989999
if not self.out_prefix:

0 commit comments

Comments
 (0)