From 7ab854daf88874a5dd2061b545f0704b9f4f991b Mon Sep 17 00:00:00 2001 From: Jason Morris Date: Wed, 2 Nov 2016 11:06:33 +0200 Subject: [PATCH 1/2] Implemented `toJsonObject` and allowed `fromJson` to accept JSON objects that can be embedded in larger serialized structures --- .gitignore | 1 + lib/naive_bayes.js | 32 ++++++++++++++++++++++---------- readme.md | 8 ++++++-- test/naive_bayes.js | 18 ++++++++++++++++++ 4 files changed, 47 insertions(+), 12 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..096746c --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/node_modules/ \ No newline at end of file diff --git a/lib/naive_bayes.js b/lib/naive_bayes.js index a258db1..1e70a99 100644 --- a/lib/naive_bayes.js +++ b/lib/naive_bayes.js @@ -15,16 +15,21 @@ var STATE_KEYS = module.exports.STATE_KEYS = [ * Initializes a NaiveBayes instance from a JSON state representation. * Use this with classifier.toJson(). * - * @param {String} jsonStr state representation obtained by classifier.toJson() + * @param {String} jsonStr state representation obtained by classifier.toJson() or classifier.toJsonObject() * @return {NaiveBayes} Classifier */ module.exports.fromJson = function (jsonStr) { var parsed; - try { - parsed = JSON.parse(jsonStr) - } catch (e) { - throw new Error('Naivebayes.fromJson expects a valid JSON string.') + if(typeof jsonStr === 'string') { + try { + parsed = JSON.parse(jsonStr) + } catch (e) { + throw new Error('Naivebayes.fromJson expects a valid JSON string.') + } + } else if(typeof jsonStr === 'object') { + parsed = jsonStr // if it's an object try use it directly } + // init a new classifier var classifier = new Naivebayes(parsed.options) @@ -255,18 +260,25 @@ Naivebayes.prototype.frequencyTable = function (tokens) { } /** - * Dump the classifier's state as a JSON string. - * @return {String} Representation of the classifier. + * Dump the classifier's state as a simple object, suitable for embedding within a JSON document or another object. + * @return {Object} Representation of the classifier. */ -Naivebayes.prototype.toJson = function () { +Naivebayes.prototype.toJsonObject = function () { var state = {} var self = this STATE_KEYS.forEach(function (k) { state[k] = self[k] }) + + return state +} - var jsonStr = JSON.stringify(state) - +/** + * Dump the classifier's state as a JSON string. + * @return {String} Representation of the classifier. + */ +Naivebayes.prototype.toJson = function () { + var jsonStr = JSON.stringify(this.toJsonObject()) return jsonStr } diff --git a/readme.md b/readme.md index f268a5b..a7a09f4 100644 --- a/readme.md +++ b/readme.md @@ -72,11 +72,15 @@ Returns the `category` it thinks `text` belongs to. Its judgement is based on wh ###`classifier.toJson()` -Returns the JSON representation of a classifier. +Returns the JSON representation of a classifier. This is the same as `JSON.stringify(classifier.toJsonObject())`. + +###`classifier.toJsonObject()` + +Returns a JSON-friendly representation of the classifier as an `object`. ###`var classifier = bayes.fromJson(jsonStr)` -Returns a classifier instance from the JSON representation. Use this with the JSON representation obtained from `classifier.toJson()` +Returns a classifier instance from the JSON representation. Use this with the JSON representation obtained from `classifier.toJson()` or `classifier.toJsonObject()` ## License diff --git a/test/naive_bayes.js b/test/naive_bayes.js index 9a501eb..4c57f90 100644 --- a/test/naive_bayes.js +++ b/test/naive_bayes.js @@ -72,6 +72,24 @@ describe('bayes serializing/deserializing its state', function () { assert.deepEqual(revivedClassifier[k], classifier[k]) }) + done() + }) + + it('serializes/deserializes its state as an Object correctly.', function (done) { + var classifier = bayes() + + classifier.learn('Fun times were had by all', 'positive') + classifier.learn('sad dark rainy day in the cave', 'negative') + + var jsonRepr = classifier.toJsonObject() + + var revivedClassifier = bayes.fromJson(jsonRepr) + + // ensure the revived classifier's state is same as original state + bayes.STATE_KEYS.forEach(function (k) { + assert.deepEqual(revivedClassifier[k], classifier[k]) + }) + done() }) }) From aa9e27d7108c194b39085135d571baede6c3f6bb Mon Sep 17 00:00:00 2001 From: Jason Morris Date: Mon, 27 Mar 2017 07:40:58 +0200 Subject: [PATCH 2/2] Added the `probabilities` function to allow more details of each categories probability --- .gitignore | 3 +- lib/naive_bayes.js | 72 +++++++++++++++++++++++++--------------------- 2 files changed, 42 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index 096746c..bee8da8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -/node_modules/ \ No newline at end of file +/node_modules/ +/nbproject/private/ \ No newline at end of file diff --git a/lib/naive_bayes.js b/lib/naive_bayes.js index 1e70a99..71b3f63 100644 --- a/lib/naive_bayes.js +++ b/lib/naive_bayes.js @@ -172,52 +172,60 @@ Naivebayes.prototype.learn = function (text, category) { } /** - * Determine what category `text` belongs to. - * - * @param {String} text - * @return {String} category + * Returns the probability the given text matches each category, sorted in order from most likely to least likely. This + * method allows you to check the differences between the category probabilities. + * + * @param {string} text the text to categorize + * @returns {Array} an array of objects in the form {category, probability} */ -Naivebayes.prototype.categorize = function (text) { +Naivebayes.prototype.probabilities = function (text) { var self = this - , maxProbability = -Infinity - , chosenCategory = null var tokens = self.tokenizer(text) var frequencyTable = self.frequencyTable(tokens) //iterate thru our categories to find the one with max probability for this text - Object - .keys(self.categories) - .forEach(function (category) { + var categoryProbabilities = Object + .keys(self.categories) + .map(function (category) { - //start by calculating the overall probability of this category - //=> out of all documents we've ever looked at, how many were - // mapped to this category - var categoryProbability = self.docCount[category] / self.totalDocuments + //start by calculating the overall probability of this category + //=> out of all documents we've ever looked at, how many were + // mapped to this category + var categoryProbability = self.docCount[category] / self.totalDocuments - //take the log to avoid underflow - var logProbability = Math.log(categoryProbability) + //take the log to avoid underflow + var logProbability = Math.log(categoryProbability) - //now determine P( w | c ) for each word `w` in the text - Object - .keys(frequencyTable) - .forEach(function (token) { - var frequencyInText = frequencyTable[token] - var tokenProbability = self.tokenProbability(token, category) + //now determine P( w | c ) for each word `w` in the text + Object + .keys(frequencyTable) + .forEach(function (token) { + var frequencyInText = frequencyTable[token] + var tokenProbability = self.tokenProbability(token, category) - // console.log('token: %s category: `%s` tokenProbability: %d', token, category, tokenProbability) + // console.log('token: %s category: `%s` tokenProbability: %d', token, category, tokenProbability) - //determine the log of the P( w | c ) for this word - logProbability += frequencyInText * Math.log(tokenProbability) - }) + //determine the log of the P( w | c ) for this word + logProbability += frequencyInText * Math.log(tokenProbability) + }) - if (logProbability > maxProbability) { - maxProbability = logProbability - chosenCategory = category - } - }) + return {category: category, probability: logProbability} + }); + + categoryProbabilities.sort(function(c1, c2) {return c2.probability - c1.probability}) - return chosenCategory + return categoryProbabilities +} + +/** + * Determine what category `text` belongs to. + * + * @param {String} text + * @return {String} category + */ +Naivebayes.prototype.categorize = function(text) { + return this.probabilities(text)[0].category } /**