./ask improvements

ryhan · Mar 23, 2013 · 9bff7dd · 9bff7dd
1 parent 5acbb0c
commit 9bff7dd
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 6 deletions.
diff --git a/ask b/ask
@@ -27,7 +27,7 @@ if __name__ == '__main__':
   # Decide how many candidates we want to generate
   # im thinking we should always generate as many questions as possible
   # and just pick the n best
-  num_cand = num_questions*10
+  num_cand = num_questions*20
 
   # Fetch sentence candidates that can be converted into questions.
   selected_content = questionContentSelector.process(article_content, num_cand)

diff --git a/modules/questionContentSelector.py b/modules/questionContentSelector.py
@@ -8,11 +8,22 @@
 import re
 import nltk
 
-# Use part-of-speech tagging and entity chunking to 
+# Use part-of-speech tagging to 
 # score the usefulness of a sentence.
 def entity_score(sentence):
-  # tokens = nltk.word_tokenize(sentence)
-  # tagged = nltk.pos_tag(tokens)
+  tokens = nltk.word_tokenize(sentence)
+  tokensU = map(lambda (x): x.upper, tokens)
+  if (2 < len(tokens) and len(tokens) < 12):
+    if ("IS" in tokensU or "WAS" in tokensU or
+        "WERE" in tokensU or "BEING" in tokensU or
+        "ARE" in tokensU):
+
+      if (nltk.pos_tag([tokens[0]])[0] == "PRP"):
+        return 1.0
+      else:
+        return 0.5 
+
+  #tagged = nltk.pos_tag(tokens)
   # entities = nltk.chunk.ne_chunk(tagged)
   score = 0
   return score

diff --git a/modules/questionFromSentence.py b/modules/questionFromSentence.py
@@ -23,6 +23,7 @@ def add_questionmark(sentence):
 # GIVEN string representing a declarative sentence,
 # RETURNS string representing a question.
 def transform(sentence):
+
   sentence = add_questionmark(sentence)   # '.' -> '?'
 
   (question, success) = transform_IT_IS(sentence)
@@ -34,9 +35,29 @@ def transform(sentence):
   posTag = nltk.pos_tag([tokens[0]])[0]
 
   #if (tokens[1].upper() in BEING and posTag == 'PRP'):
-  if (tokens[1].upper() in BEING):
+  if (len(tokens) > 1 and tokens[1].upper() in BEING):
     tokens = [tokens[1].capitalize(), tokens[0].lower()] + tokens[2:]
-    return (" ".join(tokens), True)
+
+    question = " ".join(tokens)
+    if ("," in question):
+      question = question.split(",")[0] + "?"
+    return (question, True)
+
+  if (len(tokens) > 2 and tokens[2].upper() in BEING):
+    tokens = [tokens[2].capitalize(), tokens[0].lower(), tokens[1].lower()] + tokens[3:]
+    #return (" ".join(tokens), True)
+    question = " ".join(tokens)
+    if ("," in question):
+      question = question.split(",")[0] + "?"
+    return (question, True)
+
+  if (tokens[0].upper() == "IT"):
+    tokens = ["What"] + tokens[1:]
+    #return (" ",join(tokens), True)
+    question = " ".join(tokens)
+    if ("," in question):
+      question = question.split(",")[0] + "?"
+    return (question, True)
 
   """
   tagged = nltk.pos_tag(tokens)
@@ -48,6 +69,7 @@ def transform(sentence):
     tokens = [word1.capitalize(), word0.lower()] + tokens[2:]
     return (" ".join(tokens), True)
   """
+  #print("FAIL: " + sentence)
 
   return (sentence, False)