From ac57baecfc40cbcd577da815778cc67b103bec57 Mon Sep 17 00:00:00 2001 From: Daniel Sedra Date: Sun, 14 Apr 2013 14:55:23 -0400 Subject: [PATCH] basic stuff --- modules/extractor.py | 6 +++--- modules/sourceContentSelector.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/extractor.py b/modules/extractor.py index 36d81ec..de77a2b 100644 --- a/modules/extractor.py +++ b/modules/extractor.py @@ -3,8 +3,8 @@ import nltk -sample = open("para.txt").read() - +#sample = open("para.txt").read() +sample = 'I like Harvard and partitions' sentences = nltk.sent_tokenize(sample) tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] @@ -43,4 +43,4 @@ def extract_entity_names(t): for (x,y) in tagged_sentences[0]: if y in wantset: entity_names.append(x) -print entity_names \ No newline at end of file +#print entity_names \ No newline at end of file diff --git a/modules/sourceContentSelector.py b/modules/sourceContentSelector.py index e9d54dc..3826e8e 100644 --- a/modules/sourceContentSelector.py +++ b/modules/sourceContentSelector.py @@ -73,15 +73,15 @@ def ngramWeight(question, sentence): #get all bigram overlaps, rolls around end of sentence if len(uniQ) > 1 and len(uniS) > 1: - bigramQ = {uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)} - bigramS = {uniS[i-1]+uniS[i] for i,word in enumerate(uniS)} + bigramQ = set([uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)]) + bigramS = set([uniS[i-1]+uniS[i] for i,word in enumerate(uniS)]) bigram = bigramQ.intersection(bigramS) else: bigram = {} - if len(uniQ) > 2 and len(uniS) > 1: - trigramQ = {uniQ[i-2]+uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)} - trigramS = {uniS[i-2]+uniS[i-1]+uniS[i] for i,word in enumerate(uniS)} + if len(uniQ) > 2 and len(uniS) > 2: + trigramQ = set([uniQ[i-2]+uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)]) + trigramS = set([uniS[i-2]+uniS[i-1]+uniS[i] for i,word in enumerate(uniS)]) trigram = trigramQ.intersection(trigramS) else: trigram = {}