diff --git a/answer b/answer
index c97b49d..f0de860 100755
--- a/answer
+++ b/answer
@@ -14,10 +14,12 @@ import itertools
import nltk
from nltk.stem import PorterStemmer
import bs4
# Import our modules from /modules
import questionClassifier
import sourceContentSelector
+import coref
# To answer yes/no question, we want to just answer yes or no,
# and not returna whole sentence. We do this by checking for
@@ -25,37 +27,6 @@ import sourceContentSelector
def contains_negative(sent):
return "no" in sent or "not" in sent or "n't" in sent
-# the set of pronouns, used for anaphora resolution
-pronouns = set(["he", "she", "it", "its", "it's", "him", "her", "his","they",
- "their","we", "our","i","you","your","my","mine","yours","ours"])
-resolved_articles = {}
-# Runs coreference resolution on the article using arkref.
-# This still needs to be implemented.
-def coref(path_to_article):
- if path_to_article in resolved_articles:
- return resolved_articles[path_to_article]
- subprocess.call(["./arkref.sh", "-input", path_to_article])
- tagged_article = open(path_to_article.replace("txt", "tagged")).read()
- tagged_article = ""+tagged_article+"" # trick arkref into doing entire doc
- soup = bs4.BeautifulSoup(tagged_article, "html.parser").root
- for entity in soup.find_all(True):
- if entity.string != None and entity.string.strip().lower() in pronouns:
- antecedent_id = entity["entityid"].split("_")[0]
- antecedent = soup.find(mentionid=antecedent_id)
- antecedent = str(antecedent).split(">", 1)[1].split("<", 1)[0]
- #string = re.sub('<.*?>',' ',str(antecedent))
- #tok = nltk.word_tokenize(string)
- #ants = [(x,y) for x,y in nltk.pos_tag(tok) if y in {'NNP','NN'}]
- entity.string.replace_with(antecedent)
- #print 'entity is: '+entity.string
- resolved = re.sub("<.*?>", "", str(soup))
- resolved_articles[path_to_article] = resolved
- return resolved
# Answers a question from the information in article.
# Ranks all the sentences and then returns the top choice.
def answer(question, article):
@@ -95,6 +66,6 @@ for year in ("S08", "S09", "S10"):
print "Difficulty from answerer:", difficulty_answerer
print "Difficulty from questioner:", difficulty_questioner
- article = coref(path_to_article)
+ article = coref.process(path_to_article)
print "Our answer:", answer(question, article)
print "Correct answer:", correct_answer
diff --git a/modules/coref.py b/modules/coref.py
new file mode 100644
index 0000000..73db59d
--- /dev/null
+++ b/modules/coref.py
@@ -0,0 +1,43 @@
+# coref.py
+# Useful tools which should be pre-installed
+import os, sys, errno
+import subprocess
+import re
+import itertools
+import nltk
+from nltk.stem import PorterStemmer
+import bs4
+# the set of pronouns, used for anaphora resolution
+pronouns = set(["he", "she", "it", "its", "it's", "him", "her", "his","they",
+ "their","we", "our","i","you","your","my","mine","yours","ours"])
+resolved_articles = {}
+# Runs coreference resolution on the article using arkref.
+# This still needs to be implemented.
+def process(path_to_article):
+ if path_to_article in resolved_articles:
+ return resolved_articles[path_to_article]
+ subprocess.call(["./arkref.sh", "-input", path_to_article])
+ tagged_article = open(path_to_article.replace("txt", "tagged")).read()
+ tagged_article = ""+tagged_article+"" # trick arkref into doing entire doc
+ soup = bs4.BeautifulSoup(tagged_article, "html.parser").root
+ for entity in soup.find_all(True):
+ if entity.string != None and entity.string.strip().lower() in pronouns:
+ antecedent_id = entity["entityid"].split("_")[0]
+ antecedent = soup.find(mentionid=antecedent_id)
+ antecedent = str(antecedent).split(">", 1)[1].split("<", 1)[0]
+ #string = re.sub('<.*?>',' ',str(antecedent))
+ #tok = nltk.word_tokenize(string)
+ #ants = [(x,y) for x,y in nltk.pos_tag(tok) if y in {'NNP','NN'}]
+ entity.string.replace_with(antecedent)
+ #print 'entity is: '+entity.string
+ resolved = re.sub("<.*?>", "", str(soup))
+ resolved_articles[path_to_article] = resolved
+ return resolved
\ No newline at end of file