Skip to content

Commit

Permalink
basic stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Sedra committed Apr 14, 2013
1 parent 7eb8b66 commit ac57bae
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
6 changes: 3 additions & 3 deletions modules/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import nltk

sample = open("para.txt").read()

#sample = open("para.txt").read()
sample = 'I like Harvard and partitions'
sentences = nltk.sent_tokenize(sample)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
Expand Down Expand Up @@ -43,4 +43,4 @@ def extract_entity_names(t):
for (x,y) in tagged_sentences[0]:
if y in wantset:
entity_names.append(x)
print entity_names
#print entity_names
10 changes: 5 additions & 5 deletions modules/sourceContentSelector.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@ def ngramWeight(question, sentence):

#get all bigram overlaps, rolls around end of sentence
if len(uniQ) > 1 and len(uniS) > 1:
bigramQ = {uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)}
bigramS = {uniS[i-1]+uniS[i] for i,word in enumerate(uniS)}
bigramQ = set([uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)])
bigramS = set([uniS[i-1]+uniS[i] for i,word in enumerate(uniS)])
bigram = bigramQ.intersection(bigramS)
else:
bigram = {}

if len(uniQ) > 2 and len(uniS) > 1:
trigramQ = {uniQ[i-2]+uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)}
trigramS = {uniS[i-2]+uniS[i-1]+uniS[i] for i,word in enumerate(uniS)}
if len(uniQ) > 2 and len(uniS) > 2:
trigramQ = set([uniQ[i-2]+uniQ[i-1]+uniQ[i] for i,word in enumerate(uniQ)])
trigramS = set([uniS[i-2]+uniS[i-1]+uniS[i] for i,word in enumerate(uniS)])
trigram = trigramQ.intersection(trigramS)
else:
trigram = {}
Expand Down

0 comments on commit ac57bae

Please sign in to comment.