Skip to content

Commit 6a29fcc

Browse files
test(*): update test cases & model
Co-authored-by: Rachna <[email protected]>
1 parent 3c19b63 commit 6a29fcc

11 files changed

+17
-21
lines changed

test/apiA-specs.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ describe( 'APIs — A', function () {
110110

111111
describe( 'doc API', function () {
112112
it( '.isLexeme() should detect lexemes correctly', function () {
113-
expect( doc1.isLexeme( 'recognise' ) ).to.deep.equal( [ 62816 ] );
113+
expect( doc1.isLexeme( 'recognise' ) ).to.deep.equal( [ 62879 ] );
114114

115115
expect( doc2.isLexeme( 'zxcvbnm' ) ).to.deep.equal( null );
116116
} );

test/cache-specs.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ describe( 'cache', function () {
5050
} );
5151

5252
it( 'currentSize() should current size of the lexicon', function () {
53-
expect( cache.currentSize() ).to.equal( 87510 );
53+
expect( cache.currentSize() ).to.equal( 87592 );
5454
} );
5555

5656
it( 'should compute oov number\'s pos as NUM', function () {

test/its-specs.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ describe( 'its functions for .out()', function () {
115115
it( 'its.uniqueId', function () {
116116
expect( nlp.readDoc( '$%^oov^%$' ).tokens().itemAt( 0 ).out( its.uniqueId ) ).to.equal( 0 );
117117
expect( nlp.readDoc( '\n' ).tokens().itemAt( 0 ).out( its.uniqueId ) ).to.equal( 1 );
118-
expect( nlp.readDoc( 'The' ).tokens().itemAt( 0 ).out( its.uniqueId ) ).to.equal( 77051 );
118+
expect( nlp.readDoc( 'The' ).tokens().itemAt( 0 ).out( its.uniqueId ) ).to.equal( 77123 );
119119
} );
120120

121121
it( 'its.negationFlag', function () {
@@ -213,17 +213,17 @@ describe( 'its functions for .out()', function () {
213213

214214
const rank = [
215215
{ 'importance': 0.9667, 'index': 0 },
216-
{ 'importance': 0.1667, 'index': 1 },
216+
{ 'importance': 0.2333, 'index': 1 },
217217
{ 'importance': 0.1333, 'index': 2 },
218218
{ 'importance': 0.6667, 'index': 3 },
219219
{ 'importance': 0.5, 'index': 4 },
220-
{ 'importance': 0.9333, 'index': 5 },
220+
{ 'importance': 1, 'index': 5 },
221221
{ 'importance': 1, 'index': 6 },
222222
{ 'importance': 0, 'index': 7 },
223223
{ 'importance': 0.3667, 'index': 8 },
224224
{ 'importance': 0.6333, 'index': 9 },
225225
{ 'importance': 0.9667, 'index': 10 },
226-
{ 'importance': 0.4, 'index': 11 },
226+
{ 'importance': 0.4333, 'index': 11 },
227227
{ 'importance': 0.8333, 'index': 12 },
228228
{ 'importance': 0.3667, 'index': 13 },
229229
{ 'importance': 0.1333, 'index': 14 },

test/test-model/feature.js

+5-8
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,6 @@ var feature = function ( config, lang, featuresData, isLexicographer ) {
167167
return pos || 8;
168168
};
169169

170-
// Stores the previous word passed to the `partOfSpeech` function.
171-
let prevWord = null;
172170
var partOfSpeech = function ( word, category, cache ) {
173171
if ( isLexicographer ) {
174172
// Get the array of pos tags.
@@ -185,11 +183,12 @@ var feature = function ( config, lang, featuresData, isLexicographer ) {
185183
wordInLC = word.toLowerCase();
186184
const hash = cache.lookup( wordInLC )[ 0 ];
187185
if ( hash < cache.intrinsicSize() ) {
188-
// Found, extract pos of lower cased word.
186+
// Lowercased word is found in the vocabulary; this also implies that
187+
// the `word` was NOT in lowercase. Therefore, it may be a candidate PROPN.
188+
// Now, extract pos of lower cased word.
189189
const posOfWLC = cache.posOf( hash );
190-
// Force uppercase & titlecase words to PROPN except for the forst token of the sentence.
191-
const isFirstToken = ( prevWord === null || (/^[\t\r\n.?!]+$/).test( prevWord ) );
192-
pos = ( ( rgxTC.test( word ) || rgxUC.test( word ) ) && !isFirstToken ) ? 12 : posOfWLC;
190+
// If the lowercased word is NOUN or ADJ then switch to PROPN.
191+
pos = ( posOfWLC === 8 || posOfWLC === 1 ) ? 12 : posOfWLC;
193192
} else {
194193
pos = oovPoS( word );
195194
// Word but completely missing from lexicon: if it is word-like then
@@ -198,8 +197,6 @@ var feature = function ( config, lang, featuresData, isLexicographer ) {
198197
// ( ( ( /^[a-z]*$/ ).test( word ) ) ? 8 : 12 ) : 12;
199198
}
200199
}
201-
// Update the previous word.
202-
prevWord = word;
203200
// If pos is not found, try obtaining pos from tact2pos map else NOUN.
204201
return pos || tcat2pos[ category ] || ( ( rgxTC.test( word ) ) ? 12 : 8 );
205202
}; // partOfSpeech()

test/test-model/languages/cur/models/eng-core-web-model.json

+1-1
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
["[100,127,{\"0\":{\"5\":111,\"43\":113,\"46\":108,\"49\":109,\"197\":110,\"214\":112,\"2699\":114,\"5231\":53,\"7484\":117,\"10900\":116,\"12549\":81,\"12553\":84,\"25000\":103,\"27138\":118,\"27985\":67,\"27986\":69,\"27987\":71,\"27990\":73,\"32628\":1,\"34487\":105,\"36526\":119,\"38042\":65,\"46237\":92,\"47454\":94,\"50426\":101,\"50691\":96,\"50694\":120,\"51014\":89,\"51297\":97,\"51568\":102,\"51677\":45,\"59576\":104,\"62229\":106,\"65899\":75,\"65900\":79,\"65904\":77,\"66992\":107,\"70947\":121,\"73207\":122,\"77479\":123,\"81702\":124,\"81714\":63,\"82584\":125,\"85516\":126,\"86112\":50,\"86837\":127,\"2070003\":0},\"1\":{\"47\":2,\"51\":26,\"51014\":34,\"87219\":10,\"87220\":18,\"2070003\":0},\"2\":{\"51014\":3,\"2070003\":0},\"3\":{\"47\":4,\"32628\":41,\"87219\":6,\"87220\":8,\"2070003\":0},\"4\":{\"32628\":5,\"2070003\":0},\"6\":{\"32628\":7,\"2070003\":0},\"8\":{\"32628\":9,\"2070003\":0},\"10\":{\"51014\":11,\"2070003\":0},\"11\":{\"47\":12,\"32628\":42,\"87219\":14,\"87220\":16,\"2070003\":0},\"12\":{\"32628\":13,\"2070003\":0},\"14\":{\"32628\":15,\"2070003\":0},\"16\":{\"32628\":17,\"2070003\":0},\"18\":{\"51014\":19,\"2070003\":0},\"19\":{\"47\":20,\"32628\":43,\"87219\":22,\"87220\":24,\"2070003\":0},\"20\":{\"32628\":21,\"2070003\":0},\"22\":{\"32628\":23,\"2070003\":0},\"24\":{\"32628\":25,\"2070003\":0},\"26\":{\"51014\":27,\"2070003\":0},\"27\":{\"47\":28,\"32628\":44,\"87219\":30,\"87220\":32,\"2070003\":0},\"28\":{\"32628\":29,\"2070003\":0},\"30\":{\"32628\":31,\"2070003\":0},\"32\":{\"32628\":33,\"2070003\":0},\"34\":{\"47\":35,\"32628\":62,\"87219\":37,\"87220\":39,\"2070003\":0},\"35\":{\"32628\":36,\"2070003\":0},\"37\":{\"32628\":38,\"2070003\":0},\"39\":{\"32628\":40,\"2070003\":0},\"45\":{\"12924\":48,\"41435\":88,\"52716\":87,\"78040\":46,\"2070003\":98},\"46\":{\"46776\":47,\"2070003\":0},\"48\":{\"37486\":49,\"2070003\":0},\"50\":{\"31190\":51,\"2070003\":100},\"51\":{\"1432\":52,\"2070003\":0},\"53\":{\"32791\":54,\"44355\":58,\"49203\":56,\"71336\":60,\"2070003\":115},\"54\":{\"5231\":55,\"2070003\":0},\"56\":{\"5231\":57,\"2070003\":0},\"58\":{\"5231\":59,\"2070003\":0},\"60\":{\"5231\":61,\"2070003\":0},\"63\":{\"78040\":64,\"2070003\":0},\"65\":{\"78040\":66,\"2070003\":0},\"67\":{\"78040\":68,\"2070003\":0},\"69\":{\"78040\":70,\"2070003\":0},\"71\":{\"78040\":72,\"2070003\":0},\"73\":{\"78040\":74,\"2070003\":0},\"75\":{\"53385\":76,\"2070003\":0},\"77\":{\"53385\":78,\"2070003\":0},\"79\":{\"53385\":80,\"2070003\":0},\"81\":{\"30075\":82,\"52424\":83,\"2070003\":0},\"84\":{\"30075\":85,\"52424\":86,\"2070003\":0},\"89\":{\"31190\":91,\"86214\":90,\"2070003\":99},\"92\":{\"51677\":93,\"2070003\":0},\"94\":{\"51677\":95,\"2070003\":0}},{\"5\":\"0\",\"7\":\"0\",\"9\":\"0\",\"13\":\"0\",\"15\":\"0\",\"17\":\"0\",\"21\":\"0\",\"23\":\"0\",\"25\":\"0\",\"29\":\"0\",\"31\":\"0\",\"33\":\"0\",\"36\":\"0\",\"38\":\"0\",\"40\":\"0\",\"41\":\"0\",\"42\":\"0\",\"43\":\"0\",\"44\":\"0\",\"47\":\"0\",\"49\":\"0\",\"52\":\"0\",\"55\":\"0\",\"57\":\"0\",\"59\":\"0\",\"61\":\"0\",\"62\":\"0\",\"64\":\"1\",\"66\":\"1\",\"68\":\"1\",\"70\":\"1\",\"72\":\"1\",\"74\":\"1\",\"76\":\"1\",\"78\":\"1\",\"80\":\"1\",\"82\":\"2\",\"83\":\"2\",\"85\":\"2\",\"86\":\"2\",\"87\":\"0\",\"88\":\"0\",\"90\":\"0\",\"91\":\"0\",\"93\":\"0\",\"95\":\"0\",\"96\":\"1\",\"97\":\"1\",\"98\":\"1\",\"99\":\"1\",\"100\":\"1\",\"101\":\"1\",\"102\":\"1\",\"103\":\"1\",\"104\":\"1\",\"105\":\"1\",\"106\":\"1\",\"107\":\"1\",\"108\":\"2\",\"109\":\"2\",\"110\":\"2\",\"111\":\"2\",\"112\":\"2\",\"113\":\"2\",\"114\":\"2\",\"115\":\"2\",\"116\":\"2\",\"117\":\"2\",\"118\":\"2\",\"119\":\"2\",\"120\":\"2\",\"121\":\"2\",\"122\":\"2\",\"123\":\"2\",\"124\":\"2\",\"125\":\"2\",\"126\":\"2\",\"127\":\"2\"},{},{}]"]
1+
["[100,127,{\"0\":{\"5\":111,\"43\":113,\"46\":108,\"49\":109,\"197\":110,\"214\":112,\"2699\":114,\"5231\":53,\"7486\":117,\"10905\":116,\"12556\":81,\"12560\":84,\"25023\":103,\"27163\":118,\"28010\":67,\"28011\":69,\"28012\":71,\"28015\":73,\"32659\":1,\"34520\":105,\"36562\":119,\"38078\":65,\"46284\":92,\"47503\":94,\"50480\":101,\"50745\":96,\"50748\":120,\"51069\":89,\"51352\":97,\"51623\":102,\"51732\":45,\"59638\":104,\"62292\":106,\"65963\":75,\"65964\":79,\"65968\":77,\"67056\":107,\"71014\":121,\"73274\":122,\"77551\":123,\"81777\":124,\"81789\":63,\"82659\":125,\"85593\":126,\"86191\":50,\"86919\":127,\"2070003\":0},\"1\":{\"47\":2,\"51\":26,\"51069\":34,\"87301\":10,\"87302\":18,\"2070003\":0},\"2\":{\"51069\":3,\"2070003\":0},\"3\":{\"47\":4,\"32659\":41,\"87301\":6,\"87302\":8,\"2070003\":0},\"4\":{\"32659\":5,\"2070003\":0},\"6\":{\"32659\":7,\"2070003\":0},\"8\":{\"32659\":9,\"2070003\":0},\"10\":{\"51069\":11,\"2070003\":0},\"11\":{\"47\":12,\"32659\":42,\"87301\":14,\"87302\":16,\"2070003\":0},\"12\":{\"32659\":13,\"2070003\":0},\"14\":{\"32659\":15,\"2070003\":0},\"16\":{\"32659\":17,\"2070003\":0},\"18\":{\"51069\":19,\"2070003\":0},\"19\":{\"47\":20,\"32659\":43,\"87301\":22,\"87302\":24,\"2070003\":0},\"20\":{\"32659\":21,\"2070003\":0},\"22\":{\"32659\":23,\"2070003\":0},\"24\":{\"32659\":25,\"2070003\":0},\"26\":{\"51069\":27,\"2070003\":0},\"27\":{\"47\":28,\"32659\":44,\"87301\":30,\"87302\":32,\"2070003\":0},\"28\":{\"32659\":29,\"2070003\":0},\"30\":{\"32659\":31,\"2070003\":0},\"32\":{\"32659\":33,\"2070003\":0},\"34\":{\"47\":35,\"32659\":62,\"87301\":37,\"87302\":39,\"2070003\":0},\"35\":{\"32659\":36,\"2070003\":0},\"37\":{\"32659\":38,\"2070003\":0},\"39\":{\"32659\":40,\"2070003\":0},\"45\":{\"12932\":48,\"41471\":88,\"52773\":87,\"78113\":46,\"2070003\":98},\"46\":{\"46824\":47,\"2070003\":0},\"48\":{\"37522\":49,\"2070003\":0},\"50\":{\"31218\":51,\"2070003\":100},\"51\":{\"1432\":52,\"2070003\":0},\"53\":{\"32822\":54,\"44398\":58,\"49257\":56,\"71403\":60,\"2070003\":115},\"54\":{\"5231\":55,\"2070003\":0},\"56\":{\"5231\":57,\"2070003\":0},\"58\":{\"5231\":59,\"2070003\":0},\"60\":{\"5231\":61,\"2070003\":0},\"63\":{\"78113\":64,\"2070003\":0},\"65\":{\"78113\":66,\"2070003\":0},\"67\":{\"78113\":68,\"2070003\":0},\"69\":{\"78113\":70,\"2070003\":0},\"71\":{\"78113\":72,\"2070003\":0},\"73\":{\"78113\":74,\"2070003\":0},\"75\":{\"53444\":76,\"2070003\":0},\"77\":{\"53444\":78,\"2070003\":0},\"79\":{\"53444\":80,\"2070003\":0},\"81\":{\"30102\":82,\"52480\":83,\"2070003\":0},\"84\":{\"30102\":85,\"52480\":86,\"2070003\":0},\"89\":{\"31218\":91,\"86294\":90,\"2070003\":99},\"92\":{\"51732\":93,\"2070003\":0},\"94\":{\"51732\":95,\"2070003\":0}},{\"5\":\"0\",\"7\":\"0\",\"9\":\"0\",\"13\":\"0\",\"15\":\"0\",\"17\":\"0\",\"21\":\"0\",\"23\":\"0\",\"25\":\"0\",\"29\":\"0\",\"31\":\"0\",\"33\":\"0\",\"36\":\"0\",\"38\":\"0\",\"40\":\"0\",\"41\":\"0\",\"42\":\"0\",\"43\":\"0\",\"44\":\"0\",\"47\":\"0\",\"49\":\"0\",\"52\":\"0\",\"55\":\"0\",\"57\":\"0\",\"59\":\"0\",\"61\":\"0\",\"62\":\"0\",\"64\":\"1\",\"66\":\"1\",\"68\":\"1\",\"70\":\"1\",\"72\":\"1\",\"74\":\"1\",\"76\":\"1\",\"78\":\"1\",\"80\":\"1\",\"82\":\"2\",\"83\":\"2\",\"85\":\"2\",\"86\":\"2\",\"87\":\"0\",\"88\":\"0\",\"90\":\"0\",\"91\":\"0\",\"93\":\"0\",\"95\":\"0\",\"96\":\"1\",\"97\":\"1\",\"98\":\"1\",\"99\":\"1\",\"100\":\"1\",\"101\":\"1\",\"102\":\"1\",\"103\":\"1\",\"104\":\"1\",\"105\":\"1\",\"106\":\"1\",\"107\":\"1\",\"108\":\"2\",\"109\":\"2\",\"110\":\"2\",\"111\":\"2\",\"112\":\"2\",\"113\":\"2\",\"114\":\"2\",\"115\":\"2\",\"116\":\"2\",\"117\":\"2\",\"118\":\"2\",\"119\":\"2\",\"120\":\"2\",\"121\":\"2\",\"122\":\"2\",\"123\":\"2\",\"124\":\"2\",\"125\":\"2\",\"126\":\"2\",\"127\":\"2\"},{},{}]"]

test/test-model/languages/cur/models/eng-ner-model.json

+1-1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)