Skip to content

Commit

Permalink
Stanford Coref
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Sedra committed Mar 30, 2013
1 parent 4db2572 commit 3d3b3c8
Show file tree
Hide file tree
Showing 864 changed files with 319,333 additions and 0 deletions.
28 changes: 28 additions & 0 deletions modules/sourceContentSelector.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,31 @@ def getRelevantSentences(keywords, article):
score += 1
relevant.append((sent, score))
return relevant

# compare two sentences using ngrams (upto trigram)
def ngramWeight(question,sentence):
#stem and take set intersections for unigrams
uniQ = map(ps.stem, nltk.word_tokenize(question))
uniS = map(ps.stem, nltk.word_tokenize(sentence))
unigram = set(uniQ).intersection(set(uniS))

#get all bigram overlaps, rolls around end of sentence
bigramQ = {uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)}
bigramS = {uniS[i-1]+uniS[i] for i,word in enumerate(uniS)}
bigram = bigramQ.intersection(bigramS)

trigramQ = {uniQ[i-2]+uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)}
trigramS = {uniS[i-2]+uniS[i-1]+uniS[i] for i,word in enumerate(uniS)}
trigram = trigramQ.intersection(trigramS)

lam1 = 0.2
lam2 = 0.3
lam3 = 0.5

return lam1*len(unigram) + lam2*len(bigram) + lam3*len(trigram)

if __name__ == '__main__':
ngramWeight('I like dolphin now','Sam also likes dolphins now')



156 changes: 156 additions & 0 deletions stanford-corenlp-full-2012-11-12/CoreNLP-to-HTML.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<?xml version="1.0" encoding="UTF-8"?>

<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:d="http://nlp.stanford.edu/CoreNLP/v1">

<xsl:output method="html"/>

<xsl:template match="/">
<html>
<body>
<center><h2>Stanford CoreNLP XML Output</h2></center>
<hr size="3" color="#333333"/>
<center><h3>Document</h3></center>
<table border="1" style="background-color:#f0f0f0;" align="center">
<tr><th>Sentences</th></tr>
<xsl:for-each select="root/document/sentences/sentence">
<tr><td>
<xsl:apply-templates select=".">
<xsl:with-param name="position" select="position()"/>
</xsl:apply-templates>
</td></tr>
</xsl:for-each>

<tr><th>Coreference resolution graph</th></tr>
<tr><td>
<xsl:apply-templates select="root/document/coreference"/>
</td></tr>
</table>

</body>
</html>
</xsl:template>

<xsl:template match="root/document/sentences/sentence">
<xsl:param name="position" select="'0'"/>
<i><b>Sentence #<xsl:value-of select="$position"/></b></i>

<p>
<i>Tokens</i><br/>
<xsl:apply-templates select="tokens"/>
</p>

<p>
<i>Parse tree</i><br/>
<xsl:value-of select="parse"/>
</p>

<p>
<i>Uncollapsed dependencies</i>
<ul>
<xsl:for-each select="basic-dependencies">
<xsl:apply-templates select="dep"/>
</xsl:for-each>
</ul>
</p>

<p>
<i>Collapsed dependencies</i>
<ul>
<xsl:for-each select="collapsed-dependencies">
<xsl:apply-templates select="dep"/>
</xsl:for-each>
</ul>
</p>

<p>
<i>Collapsed dependencies with CC processed</i>
<ul>
<xsl:for-each select="collapsed-ccprocessed-dependencies">
<xsl:apply-templates select="dep"/>
</xsl:for-each>
</ul>
</p>
</xsl:template>

<xsl:template match="tokens">
<table border="1">
<tr>
<th>Id</th>
<th>Word</th>
<th>Lemma</th>
<th>Char begin</th>
<th>Char end</th>
<th>POS</th>
<th>NER</th>
<th>Normalized NER</th>
</tr>
<xsl:for-each select="token">
<tr>
<td><xsl:value-of select="@id"/></td>
<td><xsl:value-of select="word"/></td>
<td><xsl:value-of select="lemma"/></td>
<td><xsl:value-of select="CharacterOffsetBegin"/></td>
<td><xsl:value-of select="CharacterOffsetEnd"/></td>
<td><xsl:value-of select="POS"/></td>
<td><xsl:value-of select="NER"/></td>
<td><xsl:value-of select="NormalizedNER"/></td>
</tr>
</xsl:for-each>
</table>
</xsl:template>

<xsl:template match="basic-dependencies">
<ul>
<xsl:for-each select="dep">
<xsl:apply-templates select="."/>
</xsl:for-each>
</ul>
</xsl:template>

<xsl:template match="collapsed-dependencies">
<ul>
<xsl:for-each select="dep">
<xsl:apply-templates select="."/>
</xsl:for-each>
</ul>
</xsl:template>

<xsl:template match="collapsed-ccprocessed-dependencies">
<ul>
<xsl:for-each select="dep">
<xsl:apply-templates select="."/>
</xsl:for-each>
</ul>
</xsl:template>

<xsl:template match="dep">
<li>
<xsl:value-of select="@type"/>
(
<xsl:value-of select="governor"/>-<xsl:value-of select="governor/@idx"/>
,
<xsl:value-of select="dependent"/>-<xsl:value-of select="dependent/@idx"/>
)
</li>
</xsl:template>

<xsl:template match="coreference">
<ol>
<xsl:for-each select="coreference">
<li>
<ul>
<xsl:for-each select="mention">
<li> sentence <xsl:value-of select="sentence"/>,
headword <xsl:value-of select="head"/>
<xsl:if test="@representative"> (gov) </xsl:if>
</li>
</xsl:for-each>
</ul>
</li>
</xsl:for-each>
</ol>
</xsl:template>

</xsl:stylesheet>
20 changes: 20 additions & 0 deletions stanford-corenlp-full-2012-11-12/LIBRARY-LICENSES
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
xom-1.2.6.jar

Url: http://www.xom.nu/

License: http://www.xom.nu/license.xhtml

LGPL
Gnu lesser general public license

-----------------------------------------

joda-time-2.0.jar

Url: http://joda-time.sourceforge.net/

License: http://joda-time.sourceforge.net/license.html

Apache License 2.0

-----------------------------------------
Loading

0 comments on commit 3d3b3c8

Please sign in to comment.