Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions lib/count_words.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
class CountWords

# parse text
def run(filename)

# read the text file
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i like the pipelining. Also see Mike Gee's comment on sticking some of this stuff into the constructor.

text = readFile(filename)

# strip out non alpha characters
cleanedText = stripCharacters(text.downcase)

# count all words
wordCount = countAllWords(cleanedText)

# reverse sort word counts
sortedWords = reverseSortWordCounts(wordCount)

# display word plus it's found value
displayWordCounts(sortedWords)
end

# get the contents of a file
def readFile(filename)
# TODO 1. ensure file extension is .txt
# 2. limit file size or refactor app to stream lines
# 3. ensure the file exists
File.read(filename)
end

# strip non alpha characters from text
def stripCharacters(text)
text.scan(/[\w']+/)
end

# count all the words and add them to a hash
def countAllWords(cleanedText)
wordCount = {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

anytime you initialize a collection to then iteratively build it up is a clue to maybe use a method like .map or .reduce which are common in a lot of languages.

here i'd recommend using a method like .reduce or .each_with_object.
lines 37-46 could also be written:

cleaned_text.reduce(Hash.new(0)) { |hash,word| hash[word] += 1; hash }

or

cleaned_text.each_with_object(Hash.new(0)) { |word, hash| hash[word] += 1}

cleanedText.each { |word|
if wordCount[word] # if word exists, increment the number of times it has been found
wordCount[word] = wordCount[word] + 1
else # if word does not exist, set it's found value to 1
wordCount[word] = 1
end
}
wordCount
end

# reverse sort word counts
def reverseSortWordCounts(wordCount)
wordCount.sort_by{|_key, value| value}.reverse
end

# display sorted word counts
def displayWordCounts(sortedWords)
sortedWords.each { |_key, value|
p _key + " is mentioned " + value.to_s + " times"
}
end

end

# CountWords.new.run('speech.txt')
36 changes: 36 additions & 0 deletions spec/count_words_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
require 'rspec'
require 'count_words'

describe CountWords do
it 'should be a Class' do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

watch your spaces ;)

expect(described_class.is_a? Class).to eq true
end

it 'should read a file that exists' do
countWords = CountWords.new
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See betterspecs.org documentation on let
http://www.betterspecs.org/

expect(countWords.readFile("speech.txt")).to be_a_kind_of(String)
end

it 'should strip out non alpha characters besides apostrophes' do
# this could be expanded upon to include non keyboard characters
countWords = CountWords.new
text = "aaa./,;'[]\!@#$%^&*(){}|:<>?"
expect(countWords.stripCharacters(text)).to eq ["aaa", "'"]
end

it 'should count words' do
countWords = CountWords.new
textArray = ["a", "wonderful", "world", "this", "is", "a", "super", "super", "super", "wonderful", "world"]
textHash = { "a" => 2, "wonderful" => 2, "world" => 2, "this" => 1, "is" => 1, "super" => 3 }
expect(countWords.countAllWords(textArray)).to eq textHash
end

it 'should reverse sort a hash' do
countWords = CountWords.new
textHash = { "a" => 1, "wonderful" => 2 }
reverseSortedText = [["wonderful", 2], ["a", 1]]
expect(countWords.reverseSortWordCounts(textHash)).to eq reverseSortedText
end

# TODO - figure out how to stub or mock console out and test the actual written output to console
end