Skip to content

Commit

Permalink
i don't even
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen Bly committed Apr 16, 2013
1 parent 15556fc commit df0efad
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 37 deletions.
77 changes: 41 additions & 36 deletions answer
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def contains_negative(sent):
# Answers a question from the information in article.
# Ranks all the sentences and then returns the top choice.
def answer(question, article):
question = question.strip()
question = question.strip().rstrip("?").lower()
question_type = questionClassifier.process(question)
question = nltk.tokenize.word_tokenize(question)
relevant = sourceContentSelector.getScoredSentences(question, article)
Expand All @@ -42,40 +42,45 @@ def answer(question, article):
# The main script
if __name__ == '__main__':
article_name = sys.argv[1]
questions = open(sys.argv[2]).read().split("\n")

verbose = False
if (len(sys.argv) > 2):
print("verbose?")
if (sys.argv[2] =="--v" or sys.argv[2] == "-v"):
verbose = True
article = coref.process(article_name)
for question in questions:
print answer(question, article)

for year in ("S08", "S09", "S10"):
if verbose:
print "Year:", year
prefix = "Question_Answer_Dataset_v1.1/"+year+"/"
question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines()
question_answer_pairs.pop(0)
for line in question_answer_pairs:
if not line.startswith(article_name): continue
line = line.lstrip(article_name)
end = line.find("?")
if end == -1: continue
question = line[:end+1].strip()
line = line[end+1:].split()
path_to_article = prefix+line.pop()+".txt"
difficulty_answerer = line.pop()
difficulty_questioner = line.pop()
correct_answer = " ".join(line)

if verbose:
print "Question:", question
print "Difficulty from answerer:", difficulty_answerer
print "Difficulty from questioner:", difficulty_questioner

article = coref.process(path_to_article)

if verbose:
print "Our answer:", answer(question, article)
print "Correct answer:", correct_answer
else:
print answer(question, article)
#verbose = False
#if (len(sys.argv) > 2):
# print("verbose?")
# if (sys.argv[2] =="--v" or sys.argv[2] == "-v"):
# verbose = True
#
#for year in ("S08", "S09", "S10"):
# if verbose:
# print "Year:", year
# prefix = "Question_Answer_Dataset_v1.1/"+year+"/"
# question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines()
# question_answer_pairs.pop(0)
# for line in question_answer_pairs:
# if not line.startswith(article_name): continue
# line = line.lstrip(article_name)
# end = line.find("?")
# if end == -1: continue
# question = line[:end+1].strip()
# line = line[end+1:].split()
# path_to_article = prefix+line.pop()+".txt"
# difficulty_answerer = line.pop()
# difficulty_questioner = line.pop()
# correct_answer = " ".join(line)
#
# if verbose:
# print "Question:", question
# print "Difficulty from answerer:", difficulty_answerer
# print "Difficulty from questioner:", difficulty_questioner
#
# article = coref.process(path_to_article)
#
# if verbose:
# print "Our answer:", answer(question, article)
# print "Correct answer:", correct_answer
# else:
# print answer(question, article)
3 changes: 2 additions & 1 deletion modules/sourceContentSelector.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def getScoredSentences(question, article):
sentences = nltk.tokenize.sent_tokenize(article)
for sentence in sentences:
if sentence.strip() == "": continue
s = score(question, nltk.word_tokenize(sentence))
tokenized = nltk.word_tokenize(sentence.lower())
s = score(question, tokenized)
scored_sentences.append((sentence, s))
return scored_sentences

Expand Down
5 changes: 5 additions & 0 deletions questions_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
WHat is a Kangaroo?
What is a roo?
Is a kangaroo a marsupial?
Is a kangaroo not a marsupial?
Where do kangaroos live, yo?

0 comments on commit df0efad

Please sign in to comment.