-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathtag_translator.py
More file actions
executable file
·104 lines (84 loc) · 2.38 KB
/
Copy pathtag_translator.py
File metadata and controls
executable file
·104 lines (84 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
# tag_translator.py
# Using Python 3.4.3
#
# PURPOSE: Convert between tagsets defined in spreadsheet file(s)
import sys
import string
import argparse
TAG_DICT = 0
POSCOL = 0
DELIMITER = 0
INFILE = 0
OUTFILE = 0
VERBOSE = False
def main(argc, argv):
tags = {}
# Read in tag mapping
for line in TAG_DICT:
mapping = line.split(DELIMITER)
tags[mapping[0]] = mapping[1].strip()
if VERBOSE:
print("Tags: {}".format(tags))
print()
for line in INFILE:
old_row = line.split(DELIMITER)
old_tag = old_row[POSCOL].strip()
new_tag = ""
if old_tag in tags:
new_tag = tags[old_tag]
else:
new_tag = ""
new_row = old_row
new_row[POSCOL] = new_tag
new_row[-1] = new_row[-1].strip()
OUTFILE.write("\t".join(new_row))
OUTFILE.write("\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=("Convert between tagsetes defined in spreadsheet file(s)"))
# Optional arguments
parser.add_argument(
"-v", "--verbose",
action="store_true",
help="verbose flag (Default: False)")
parser.add_argument(
"-d", "--delimiter",
type=str,
default="\t",
help="delimiter for input file (Default: tab)")
parser.add_argument(
"-c", "--column",
metavar="n",
type=int,
default=1,
help=("pos-tag column in input file "
"(Default: 1)"))
# Positional arguments
parser.add_argument(
"tag_dict",
type=argparse.FileType("r"),
help="Tag Mapping file")
parser.add_argument(
"infile",
nargs="?",
type=argparse.FileType("r"),
default=sys.stdin,
help="corpus file (Default: stdin)")
parser.add_argument(
"outfile",
nargs="?",
type=argparse.FileType("w"),
default=sys.stdout,
help="re-tagged file (Default: stdout)")
args = parser.parse_args()
if args.verbose:
VERBOSE = True
DELIMITER = args.delimiter
INFILE = args.infile
TAG_DICT = args.tag_dict
POSCOL = args.column
OUTFILE = args.outfile
main(len(sys.argv), sys.argv)
args.infile.close()
args.outfile.close()