From bc71d26ba1f05907e18407832af43d13e4261ec4 Mon Sep 17 00:00:00 2001 From: Stephen Bly Date: Fri, 12 Apr 2013 15:59:09 -0400 Subject: [PATCH] Beautiful Soup --- answer | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/answer b/answer index a86b276..18bc6f6 100755 --- a/answer +++ b/answer @@ -13,7 +13,7 @@ import re import itertools import nltk from nltk.stem import PorterStemmer -import xml.etree.ElementTree as ET +from bs4 import BeautifulSoup # Import our modules from /modules sys.path.append("modules") import questionClassifier @@ -26,13 +26,17 @@ def contains_negative(sent): return "no" in sent or "not" in sent or "n't" in sent # the set of pronouns, used for anaphora resolution -pronouns = set(["he", "she", "it", "him", "her", "his"]) +pronouns = set(["he", "she", "it", "him", "her", "his","they","their","we", + "our","I","you","your","my","mine","yours","ours"]) # Runs coreference resolution on the article using arkref. # This still needs to be implemented. def coref(path_to_article): subprocess.call(["./arkref.sh", "-input", path_to_article]) - #path_to_article = path_to_article.replace("txt", "tagged") + tagged_article = open(path_to_article.replace("txt", "tagged")).read() + tagged_article = ""+tagged_article+"" + soup = BeautifulSoup(tagged_article, "xml") + print soup.prettify() return open(path_to_article).read() # Answers a question from the information in article. @@ -49,7 +53,6 @@ def answer(question, article): else: return top - # The main script if __name__ == '__main__': article_name = sys.argv[1]