Skip to content

Commit

Permalink
Now just type in the name of the articles, and it will answer all que…
Browse files Browse the repository at this point in the history
…stions for that article from all years.
  • Loading branch information
sbly committed Mar 28, 2013
1 parent 741672d commit efebb56
Show file tree
Hide file tree
Showing 13 changed files with 30 additions and 2,046 deletions.
38 changes: 28 additions & 10 deletions answer
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ sys.path.append("modules")

import questionClassifier
import sourceContentSelector
from nltk_contrib.coref.resolve import BaselineCorefResolver
#from nltk_contrib.coref.resolve import BaselineCorefResolver

#entity_names = []
#
Expand All @@ -30,7 +30,7 @@ def contains_negative(sent):
return "no" in sent or "not" in sent or \
"didn't" in sent or "did not" in sent

resolver = BaselineCorefResolver()
#resolver = BaselineCorefResolver()

# picks the sentence that has the most keywords in common with the question
def answer(question, article):
Expand All @@ -51,14 +51,32 @@ def answer(question, article):


if __name__ == '__main__':
path_to_article = sys.argv[1]
path_to_questions = sys.argv[2]
article_name = sys.argv[1]

# Pre-process article content.
article = open(path_to_article).read()
for year in ("S08", "S09", "S10"):
print "Year:", year
prefix = "Question_Answer_Dataset_v1.1/"+year+"/"
question_answer_pairs = open(prefix+"question_answer_pairs.txt").readlines()
question_answer_pairs.pop(0)
for line in question_answer_pairs:
if not line.startswith(article_name): continue
line = line.lstrip(article_name)
end = line.find("?")
if end == -1: continue
question = line[:end+1].strip()
line = line[end+1:].split()
path_to_article = line.pop()
difficulty_answerer = line.pop()
difficulty_questioner = line.pop()
correct_answer = " ".join(line)

print "Question:", question
print "Difficulty from answerer:", difficulty_answerer
print "Difficulty from questioner:", difficulty_questioner

# Open the question file and start answering questions.
for question in open(path_to_questions):
print question
print answer(question, article)
# Open the question file and start answering questions.
article = open(prefix+path_to_article+".txt").read()
print "Our answer:", answer(question, article)
print "Correct answer:", correct_answer

print
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
12 changes: 2 additions & 10 deletions data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,5 @@

Saved from https://www.ark.cs.cmu.edu/NLP/S13/data/

Within each `/set[n]` is a
- `a6.html` containing the html of a wikipedia article, and
- `a6.txt` containing the plaintext version of a wikipedia article.

The titles of the articles are
- David Beckham (Set 1)
- Orion, constellation (Set 2)
- Latin (Set 3)
- Star Wars Episode IV: A New Hope (Set 4)
- Lisp, programming language (Set 5)
Articles from wikipedia
Some have corresponding questions
File renamed without changes.
274 changes: 0 additions & 274 deletions data/set1/a6.html

This file was deleted.

123 changes: 0 additions & 123 deletions data/set2/a6.html

This file was deleted.

158 changes: 0 additions & 158 deletions data/set3/a6.html

This file was deleted.

173 changes: 0 additions & 173 deletions data/set4/a6.html

This file was deleted.

228 changes: 0 additions & 228 deletions data/set5/a6.html

This file was deleted.

1,070 changes: 0 additions & 1,070 deletions output

This file was deleted.

0 comments on commit efebb56

Please sign in to comment.