From df0efad98d6f43bbb27cdf30bf70f0fbe15bcd31 Mon Sep 17 00:00:00 2001 From: Stephen Bly Date: Mon, 15 Apr 2013 23:45:21 -0400 Subject: [PATCH] i don't even --- answer | 77 +++++++++++++++++--------------- modules/sourceContentSelector.py | 3 +- questions_test.txt | 5 +++ 3 files changed, 48 insertions(+), 37 deletions(-) create mode 100644 questions_test.txt diff --git a/answer b/answer index 6b2eaab..f44b291 100755 --- a/answer +++ b/answer @@ -29,7 +29,7 @@ def contains_negative(sent): # Answers a question from the information in article. # Ranks all the sentences and then returns the top choice. def answer(question, article): - question = question.strip() + question = question.strip().rstrip("?").lower() question_type = questionClassifier.process(question) question = nltk.tokenize.word_tokenize(question) relevant = sourceContentSelector.getScoredSentences(question, article) @@ -42,40 +42,45 @@ def answer(question, article): # The main script if __name__ == '__main__': article_name = sys.argv[1] + questions = open(sys.argv[2]).read().split("\n") - verbose = False - if (len(sys.argv) > 2): - print("verbose?") - if (sys.argv[2] =="--v" or sys.argv[2] == "-v"): - verbose = True + article = coref.process(article_name) + for question in questions: + print answer(question, article) - for year in ("S08", "S09", "S10"): - if verbose: - print "Year:", year - prefix = "Question_Answer_Dataset_v1.1/"+year+"/" - question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines() - question_answer_pairs.pop(0) - for line in question_answer_pairs: - if not line.startswith(article_name): continue - line = line.lstrip(article_name) - end = line.find("?") - if end == -1: continue - question = line[:end+1].strip() - line = line[end+1:].split() - path_to_article = prefix+line.pop()+".txt" - difficulty_answerer = line.pop() - difficulty_questioner = line.pop() - correct_answer = " ".join(line) - - if verbose: - print "Question:", question - print "Difficulty from answerer:", difficulty_answerer - print "Difficulty from questioner:", difficulty_questioner - - article = coref.process(path_to_article) - - if verbose: - print "Our answer:", answer(question, article) - print "Correct answer:", correct_answer - else: - print answer(question, article) + #verbose = False + #if (len(sys.argv) > 2): + # print("verbose?") + # if (sys.argv[2] =="--v" or sys.argv[2] == "-v"): + # verbose = True + # + #for year in ("S08", "S09", "S10"): + # if verbose: + # print "Year:", year + # prefix = "Question_Answer_Dataset_v1.1/"+year+"/" + # question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines() + # question_answer_pairs.pop(0) + # for line in question_answer_pairs: + # if not line.startswith(article_name): continue + # line = line.lstrip(article_name) + # end = line.find("?") + # if end == -1: continue + # question = line[:end+1].strip() + # line = line[end+1:].split() + # path_to_article = prefix+line.pop()+".txt" + # difficulty_answerer = line.pop() + # difficulty_questioner = line.pop() + # correct_answer = " ".join(line) + # + # if verbose: + # print "Question:", question + # print "Difficulty from answerer:", difficulty_answerer + # print "Difficulty from questioner:", difficulty_questioner + # + # article = coref.process(path_to_article) + # + # if verbose: + # print "Our answer:", answer(question, article) + # print "Correct answer:", correct_answer + # else: + # print answer(question, article) diff --git a/modules/sourceContentSelector.py b/modules/sourceContentSelector.py index 0e0258a..b7c24ee 100644 --- a/modules/sourceContentSelector.py +++ b/modules/sourceContentSelector.py @@ -33,7 +33,8 @@ def getScoredSentences(question, article): sentences = nltk.tokenize.sent_tokenize(article) for sentence in sentences: if sentence.strip() == "": continue - s = score(question, nltk.word_tokenize(sentence)) + tokenized = nltk.word_tokenize(sentence.lower()) + s = score(question, tokenized) scored_sentences.append((sentence, s)) return scored_sentences diff --git a/questions_test.txt b/questions_test.txt new file mode 100644 index 0000000..e4f50c7 --- /dev/null +++ b/questions_test.txt @@ -0,0 +1,5 @@ +WHat is a Kangaroo? +What is a roo? +Is a kangaroo a marsupial? +Is a kangaroo not a marsupial? +Where do kangaroos live, yo? \ No newline at end of file