From df0efad98d6f43bbb27cdf30bf70f0fbe15bcd31 Mon Sep 17 00:00:00 2001
From: Stephen Bly <stephenbly@cmu-787440.wv.cc.cmu.edu>
Date: Mon, 15 Apr 2013 23:45:21 -0400
Subject: [PATCH] i don't even

---
 answer                           | 77 +++++++++++++++++---------------
 modules/sourceContentSelector.py |  3 +-
 questions_test.txt               |  5 +++
 3 files changed, 48 insertions(+), 37 deletions(-)
 create mode 100644 questions_test.txt

diff --git a/answer b/answer
index 6b2eaab..f44b291 100755
--- a/answer
+++ b/answer
@@ -29,7 +29,7 @@ def contains_negative(sent):
 # Answers a question from the information in article.
 # Ranks all the sentences and then returns the top choice.
 def answer(question, article):
-    question = question.strip()
+    question = question.strip().rstrip("?").lower()
     question_type = questionClassifier.process(question)
     question = nltk.tokenize.word_tokenize(question)
     relevant = sourceContentSelector.getScoredSentences(question, article)
@@ -42,40 +42,45 @@ def answer(question, article):
 # The main script
 if __name__ == '__main__':
   article_name = sys.argv[1]
+  questions = open(sys.argv[2]).read().split("\n")
 
-  verbose = False
-  if (len(sys.argv) > 2):
-    print("verbose?")
-    if (sys.argv[2] =="--v" or sys.argv[2] == "-v"):
-      verbose = True
+  article = coref.process(article_name)
+  for question in questions:
+    print answer(question, article)
 
-  for year in ("S08", "S09", "S10"):
-    if verbose:
-      print "Year:", year
-    prefix = "Question_Answer_Dataset_v1.1/"+year+"/"
-    question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines()
-    question_answer_pairs.pop(0)
-    for line in question_answer_pairs:
-      if not line.startswith(article_name): continue
-      line = line.lstrip(article_name)
-      end = line.find("?")
-      if end == -1: continue
-      question = line[:end+1].strip()
-      line = line[end+1:].split()
-      path_to_article = prefix+line.pop()+".txt"
-      difficulty_answerer = line.pop()
-      difficulty_questioner = line.pop()
-      correct_answer = " ".join(line)
-
-      if verbose:
-        print "Question:", question
-        print "Difficulty from answerer:", difficulty_answerer
-        print "Difficulty from questioner:", difficulty_questioner
-
-      article = coref.process(path_to_article)
-
-      if verbose:
-        print "Our answer:", answer(question, article)
-        print "Correct answer:", correct_answer
-      else:
-        print answer(question, article)
+  #verbose = False
+  #if (len(sys.argv) > 2):
+  #  print("verbose?")
+  #  if (sys.argv[2] =="--v" or sys.argv[2] == "-v"):
+  #    verbose = True
+  #
+  #for year in ("S08", "S09", "S10"):
+  #  if verbose:
+  #    print "Year:", year
+  #  prefix = "Question_Answer_Dataset_v1.1/"+year+"/"
+  #  question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines()
+  #  question_answer_pairs.pop(0)
+  #  for line in question_answer_pairs:
+  #    if not line.startswith(article_name): continue
+  #    line = line.lstrip(article_name)
+  #    end = line.find("?")
+  #    if end == -1: continue
+  #    question = line[:end+1].strip()
+  #    line = line[end+1:].split()
+  #    path_to_article = prefix+line.pop()+".txt"
+  #    difficulty_answerer = line.pop()
+  #    difficulty_questioner = line.pop()
+  #    correct_answer = " ".join(line)
+  #
+  #    if verbose:
+  #      print "Question:", question
+  #      print "Difficulty from answerer:", difficulty_answerer
+  #      print "Difficulty from questioner:", difficulty_questioner
+  #
+  #    article = coref.process(path_to_article)
+  #
+  #    if verbose:
+  #      print "Our answer:", answer(question, article)
+  #      print "Correct answer:", correct_answer
+  #    else:
+  #      print answer(question, article)
diff --git a/modules/sourceContentSelector.py b/modules/sourceContentSelector.py
index 0e0258a..b7c24ee 100644
--- a/modules/sourceContentSelector.py
+++ b/modules/sourceContentSelector.py
@@ -33,7 +33,8 @@ def getScoredSentences(question, article):
   sentences = nltk.tokenize.sent_tokenize(article)
   for sentence in sentences:
       if sentence.strip() == "": continue
-      s = score(question, nltk.word_tokenize(sentence))
+      tokenized = nltk.word_tokenize(sentence.lower())
+      s = score(question, tokenized)
       scored_sentences.append((sentence, s))
   return scored_sentences
 
diff --git a/questions_test.txt b/questions_test.txt
new file mode 100644
index 0000000..e4f50c7
--- /dev/null
+++ b/questions_test.txt
@@ -0,0 +1,5 @@
+WHat is a Kangaroo?
+What is a roo?
+Is a kangaroo a marsupial?
+Is a kangaroo not a marsupial?
+Where do kangaroos live, yo?
\ No newline at end of file