Skip to content

Commit

Permalink
./ask improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
ryhan committed Mar 23, 2013
1 parent 5acbb0c commit 9bff7dd
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
2 changes: 1 addition & 1 deletion ask
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if __name__ == '__main__':
# Decide how many candidates we want to generate
# im thinking we should always generate as many questions as possible
# and just pick the n best
num_cand = num_questions*10
num_cand = num_questions*20

# Fetch sentence candidates that can be converted into questions.
selected_content = questionContentSelector.process(article_content, num_cand)
Expand Down
17 changes: 14 additions & 3 deletions modules/questionContentSelector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,22 @@
import re
import nltk

# Use part-of-speech tagging and entity chunking to
# Use part-of-speech tagging to
# score the usefulness of a sentence.
def entity_score(sentence):
# tokens = nltk.word_tokenize(sentence)
# tagged = nltk.pos_tag(tokens)
tokens = nltk.word_tokenize(sentence)
tokensU = map(lambda (x): x.upper, tokens)
if (2 < len(tokens) and len(tokens) < 12):
if ("IS" in tokensU or "WAS" in tokensU or
"WERE" in tokensU or "BEING" in tokensU or
"ARE" in tokensU):

if (nltk.pos_tag([tokens[0]])[0] == "PRP"):
return 1.0
else:
return 0.5

#tagged = nltk.pos_tag(tokens)
# entities = nltk.chunk.ne_chunk(tagged)
score = 0
return score
Expand Down
26 changes: 24 additions & 2 deletions modules/questionFromSentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def add_questionmark(sentence):
# GIVEN string representing a declarative sentence,
# RETURNS string representing a question.
def transform(sentence):

sentence = add_questionmark(sentence) # '.' -> '?'

(question, success) = transform_IT_IS(sentence)
Expand All @@ -34,9 +35,29 @@ def transform(sentence):
posTag = nltk.pos_tag([tokens[0]])[0]

#if (tokens[1].upper() in BEING and posTag == 'PRP'):
if (tokens[1].upper() in BEING):
if (len(tokens) > 1 and tokens[1].upper() in BEING):
tokens = [tokens[1].capitalize(), tokens[0].lower()] + tokens[2:]
return (" ".join(tokens), True)

question = " ".join(tokens)
if ("," in question):
question = question.split(",")[0] + "?"
return (question, True)

if (len(tokens) > 2 and tokens[2].upper() in BEING):
tokens = [tokens[2].capitalize(), tokens[0].lower(), tokens[1].lower()] + tokens[3:]
#return (" ".join(tokens), True)
question = " ".join(tokens)
if ("," in question):
question = question.split(",")[0] + "?"
return (question, True)

if (tokens[0].upper() == "IT"):
tokens = ["What"] + tokens[1:]
#return (" ",join(tokens), True)
question = " ".join(tokens)
if ("," in question):
question = question.split(",")[0] + "?"
return (question, True)

"""
tagged = nltk.pos_tag(tokens)
Expand All @@ -48,6 +69,7 @@ def transform(sentence):
tokens = [word1.capitalize(), word0.lower()] + tokens[2:]
return (" ".join(tokens), True)
"""
#print("FAIL: " + sentence)

return (sentence, False)

Expand Down

0 comments on commit 9bff7dd

Please sign in to comment.