Skip to content

Commit

Permalink
Beautiful Soup
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen Bly committed Apr 12, 2013
1 parent 7dc4e46 commit bc71d26
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions answer
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import re
import itertools
import nltk
from nltk.stem import PorterStemmer
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
# Import our modules from /modules
sys.path.append("modules")
import questionClassifier
Expand All @@ -26,13 +26,17 @@ def contains_negative(sent):
return "no" in sent or "not" in sent or "n't" in sent

# the set of pronouns, used for anaphora resolution
pronouns = set(["he", "she", "it", "him", "her", "his"])
pronouns = set(["he", "she", "it", "him", "her", "his","they","their","we",
"our","I","you","your","my","mine","yours","ours"])

# Runs coreference resolution on the article using arkref.
# This still needs to be implemented.
def coref(path_to_article):
subprocess.call(["./arkref.sh", "-input", path_to_article])
#path_to_article = path_to_article.replace("txt", "tagged")
tagged_article = open(path_to_article.replace("txt", "tagged")).read()
tagged_article = "<root>"+tagged_article+"</root>"
soup = BeautifulSoup(tagged_article, "xml")
print soup.prettify()
return open(path_to_article).read()

# Answers a question from the information in article.
Expand All @@ -49,7 +53,6 @@ def answer(question, article):
else:
return top


# The main script
if __name__ == '__main__':
article_name = sys.argv[1]
Expand Down

0 comments on commit bc71d26

Please sign in to comment.