Skip to content

Commit

Permalink
testing coref
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Sedra committed Apr 14, 2013
1 parent ac57bae commit 6364d81
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 28 deletions.
61 changes: 33 additions & 28 deletions answer
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,23 @@ pronouns = set(["he", "she", "it", "him", "her", "his","they","their","we",
# This still needs to be implemented.
def coref(path_to_article):
subprocess.call(["./arkref.sh", "-input", path_to_article])
print open(path_to_article).read()
tagged_article = open(path_to_article.replace("txt", "tagged")).read()
tagged_article = "<root>"+tagged_article+"</root>"
print tagged_article
tagged_article = "<root>"+tagged_article+"</root>" # trick arkref into doing entire doc
#print tagged_article
soup = bs4.BeautifulSoup(tagged_article, "html.parser").root
for entity in soup.find_all(True):
if entity.string != None and entity.string.strip().lower() in pronouns:
antecedent_id = entity["entityid"].split("_")[0]
antecedent = soup.find(mentionid=antecedent_id)
#entity.string.replace_with(antecedent)
print antecedent
string = re.sub('<.*?>',' ',str(antecedent))
tok = nltk.word_tokenize(string)
ants = [(x,y) for x,y in nltk.pos_tag(tok) if y in {'NNP','NN'}]
entity.string.replace_with(' '.join(map(lambda (x,y):x,ants)))
#print 'entity is: '+entity.string
#entity.unwrap()
string2 = re.sub('<.*?>',' ',str(soup))
print string2

return open(path_to_article).read()

Expand All @@ -65,30 +71,29 @@ def answer(question, article):
if __name__ == '__main__':
article_name = sys.argv[1]

for year in ("S08", "S09", "S10"):
print "Year:", year
prefix = "Question_Answer_Dataset_v1.1/"+year+"/"
question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines()
question_answer_pairs.pop(0)
for line in question_answer_pairs:
if not line.startswith(article_name): continue
line = line.lstrip(article_name)
end = line.find("?")
if end == -1: continue
question = line[:end+1].strip()
line = line[end+1:].split()
path_to_article = prefix+line.pop()+".txt"
difficulty_answerer = line.pop()
difficulty_questioner = line.pop()
correct_answer = " ".join(line)
# for year in ("S08", "S09", "S10"):
# print "Year:", year
# prefix = "Question_Answer_Dataset_v1.1/"+year+"/"
# question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines()
# question_answer_pairs.pop(0)
# for line in question_answer_pairs:
# if not line.startswith(article_name): continue
# line = line.lstrip(article_name)
# end = line.find("?")
# if end == -1: continue
# question = line[:end+1].strip()
# line = line[end+1:].split()
# path_to_article = prefix+line.pop()+".txt"
# difficulty_answerer = line.pop()
# difficulty_questioner = line.pop()
# correct_answer = " ".join(line)

print "Question:", question
print "Difficulty from answerer:", difficulty_answerer
print "Difficulty from questioner:", difficulty_questioner
#print "Question:", question
#print "Difficulty from answerer:", difficulty_answerer
#print "Difficulty from questioner:", difficulty_questioner

# Open the question file and start answering questions.
article = coref(path_to_article)
print "Our answer:", answer(question, article)
print "Correct answer:", correct_answer

print
article = coref('corefTest.txt')
print article
#print "Our answer:", answer(question, article)
#print "Correct answer:", correct_answer
Empty file added corefTest.parse
Empty file.
Empty file added corefTest.sst
Empty file.
1 change: 1 addition & 0 deletions corefTest.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Kangaroos are the only large animals to use hopping as a means of locomotion. The comfortable hopping speed for a red kangaroo is about 20–25 km/h (13–16 mph), but speeds of up to 70 km/h (44 mph) can be attained over short distances, while it can sustain a speed of 40 km/h (25 mph) for nearly 2 km (1.2 mi).[25] This fast and energy-efficient method of travel has evolved because of the need to regularly cover large distances in search of food and water, rather than the need to escape predators.[citation needed] To move at slow speeds, it uses its tail to form a tripod with its two forelimbs, then raises its hind feet forward. Kangaroos are adept swimmers, and often flee into waterways if threatened by a predator. If pursued into the water, a kangaroo may use its forepaws to hold the predator underwater so as to drown it.

0 comments on commit 6364d81

Please sign in to comment.