diff --git a/lib/count_words.rb b/lib/count_words.rb new file mode 100644 index 0000000..4a1e8b7 --- /dev/null +++ b/lib/count_words.rb @@ -0,0 +1,62 @@ +class CountWords + + # parse text + def run(filename) + + # read the text file + text = readFile(filename) + + # strip out non alpha characters + cleanedText = stripCharacters(text.downcase) + + # count all words + wordCount = countAllWords(cleanedText) + + # reverse sort word counts + sortedWords = reverseSortWordCounts(wordCount) + + # display word plus it's found value + displayWordCounts(sortedWords) + end + + # get the contents of a file + def readFile(filename) + # TODO 1. ensure file extension is .txt + # 2. limit file size or refactor app to stream lines + # 3. ensure the file exists + File.read(filename) + end + + # strip non alpha characters from text + def stripCharacters(text) + text.scan(/[\w']+/) + end + + # count all the words and add them to a hash + def countAllWords(cleanedText) + wordCount = {} + cleanedText.each { |word| + if wordCount[word] # if word exists, increment the number of times it has been found + wordCount[word] = wordCount[word] + 1 + else # if word does not exist, set it's found value to 1 + wordCount[word] = 1 + end + } + wordCount + end + + # reverse sort word counts + def reverseSortWordCounts(wordCount) + wordCount.sort_by{|_key, value| value}.reverse + end + + # display sorted word counts + def displayWordCounts(sortedWords) + sortedWords.each { |_key, value| + p _key + " is mentioned " + value.to_s + " times" + } + end + +end + +# CountWords.new.run('speech.txt') \ No newline at end of file diff --git a/spec/count_words_spec.rb b/spec/count_words_spec.rb new file mode 100644 index 0000000..6b540d3 --- /dev/null +++ b/spec/count_words_spec.rb @@ -0,0 +1,36 @@ +require 'rspec' +require 'count_words' + +describe CountWords do + it 'should be a Class' do + expect(described_class.is_a? Class).to eq true + end + + it 'should read a file that exists' do + countWords = CountWords.new + expect(countWords.readFile("speech.txt")).to be_a_kind_of(String) + end + + it 'should strip out non alpha characters besides apostrophes' do + # this could be expanded upon to include non keyboard characters + countWords = CountWords.new + text = "aaa./,;'[]\!@#$%^&*(){}|:<>?" + expect(countWords.stripCharacters(text)).to eq ["aaa", "'"] + end + + it 'should count words' do + countWords = CountWords.new + textArray = ["a", "wonderful", "world", "this", "is", "a", "super", "super", "super", "wonderful", "world"] + textHash = { "a" => 2, "wonderful" => 2, "world" => 2, "this" => 1, "is" => 1, "super" => 3 } + expect(countWords.countAllWords(textArray)).to eq textHash + end + + it 'should reverse sort a hash' do + countWords = CountWords.new + textHash = { "a" => 1, "wonderful" => 2 } + reverseSortedText = [["wonderful", 2], ["a", 1]] + expect(countWords.reverseSortWordCounts(textHash)).to eq reverseSortedText + end + + # TODO - figure out how to stub or mock console out and test the actual written output to console +end \ No newline at end of file