-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
122 lines (105 loc) · 5.79 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
<!doctype html>
<html lang="en-US">
<head>
<title>pyconll</title>
<meta charset="utf-8">
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.2/css/bootstrap.min.css" integrity="sha384-Smlep5jCw/wG7hdkwQ/Z5nLIefveQRIY9nfy6xoR1uRYBtpZgI6339F5dgvm/e9B" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto+Mono|Source+Sans+Pro|Open+Sans:600">
<link rel="stylesheet" href="index.css">
<link rel="shortcut icon" href="icon.png">
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.3/umd/popper.min.js" integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49" crossorigin="anonymous"></script>
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.2/js/bootstrap.min.js" integrity="sha384-o+RDsa0aLu++PJvFqy8fFScvbHFLtbvScb8AjopnFD+iEQ7wo/CG0xlczd+2O/em" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
<script src="https://use.fontawesome.com/55a678ded5.js"></script>
<script src="index.js"></script>
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-84NYKPRZ9V"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-84NYKPRZ9V');
</script>
</head>
<body>
<div class="container">
<div class="row">
<div class="col">
<h1 class="title">pyconll</h1>
</div>
</div>
<div class="row small-space">
<div class="col">
<h3 class="subtitle">A minimal, all python, no dependency library to parse CoNLL files.</h3>
</div>
</div>
<div class="row">
<div class="col">
<p class="promo">
<strong>pyconll</strong> provides a basic, low level wrapper around the CoNLL annotation scheme.
pyconll's sensible API allows for easy automation and little ramp up time, and it works as a great
building block for creating larger CL systems.
</p>
</div>
</div>
<div class="row extra-space">
<div class="col">
<h4>Code Sample</h4>
<pre><code class="python code-sample">
# Make sure 'Lord of the Rings' has 'Lord' and 'Rings' annotated as regular NOUN.
import pyconll
import pyconll.util
UD_ENGLISH_TRAIN = './ud/train.conll'
NGRAM = 'Lord of the Rings'.split()
train = pyconll.load_from_file(UD_ENGLISH_TRAIN)
# util#find_ngrams provides an iterator of tuples where the first element is the
# sentence where the ngram was found,the second element is the numeric index of
# the ngram occurence in the sentence, and the third element is a list of the
# matching ngrams.
for sentence, i, tokens in pyconll.util.find_ngrams(train, NGRAM):
tokens[0].upos = 'NOUN'
tokens[-1].upos = 'NOUN'
# After making sure these occurrences are properly handled, go through the
# corpus and determine how many unique proper nouns are annotated.
pronouns = set()
for sentence in train:
for token in sentence:
if token.upos == 'PROPN':
pronouns.add(token.lemma)
# Print out how many unique pronouns there are in the corpus and print out the transformed version
print(len(pronouns))
# The argument to write should be a Writeable (i.e. have a write method). In this case we want to
# make sure we write to a utf-8 file.
with open('./ud/transformed.conll', 'w', encoding='utf-8') as f:
train.write(f)
</code></pre>
</div>
</div>
<div class="row extra-space">
<div class="col">
<h4>Install</h4>
<pre><code class="bash code-sample">
pip install pyconll
</code></pre>
</div>
</div>
<div class="row">
<div class="col">
<p>Want to find out more?</p>
<ul>
<li>
<i class="fa fa-github list-item-icon" aria-hidden="true"></i>
<a href="https://github.com/pyconll/pyconll">GitHub</a>
</li>
<li>
<i class="fa fa-book list-item-icon" aria-hidden="true"></i>
<a href="https://pyconll.readthedocs.io/">Documentation</a>
</li>
</ul>
</div>
</div>
</div>
</body>
</html>