Skip to content

Commit 2911864

Browse files
fix(*): add missing typescript declarations for word embeddings
references #136 references winkjs/wink-embeddings-sg-100d#1 Co-authored-by: Rachna <[email protected]>
1 parent aff4bb1 commit 2911864

File tree

4 files changed

+81
-56
lines changed

4 files changed

+81
-56
lines changed

src/api/col-get-item.js

+3-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@
4444
var colGetItemAt = function ( k, start, end, itemFn ) {
4545
// To handle relative indexing, compute actual `k` by adding `start`.
4646
var ak = k + start;
47-
return ( ( ak < start || ak > end ) ? undefined : itemFn( ak ) );
47+
if ( ak < start || ak > end ) {
48+
throw Error( `wink-nlp: ${k} is an invalid or out of bounds index.`);
49+
} else return itemFn( ak );
4850
}; // colGetItemAt()
4951

5052
module.exports = colGetItemAt;

src/api/sel-get-item.js

+3-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@
4141
* @private
4242
*/
4343
var selGetItemAt = function ( k, selection, itemFn ) {
44-
return ( ( k < 0 || k >= selection.length ) ? undefined : itemFn( selection[ k ] ) );
44+
if ( k < 0 || k >= selection.length ) {
45+
throw Error( `wink-nlp: wink-nlp: ${k} is an invalid or out of bounds index.`);
46+
} else return itemFn( selection[ k ] );
4547
}; // selGetItemAt()
4648

4749
module.exports = selGetItemAt;

test/apiA-specs.js

+12-12
Original file line numberDiff line numberDiff line change
@@ -124,20 +124,20 @@ describe( 'APIs — A', function () {
124124
// ItemAt boundary tests.
125125
// Will need a revamp once SBD is in place (TODO):
126126
describe( 'doc API out of range access test', function () {
127-
it( '.sentences() should return undefined for out of range index', function () {
128-
expect( doc1.sentences().itemAt( -1 ) ).to.equal( undefined );
129-
expect( doc1.sentences().itemAt( doc1.sentences().length() ) ).to.equal( undefined );
127+
it( '.sentences() should throw out of range index error', function () {
128+
expect( () => doc1.sentences().itemAt(-1) ).to.throw( /^wink-nlp: -1 is an invalid or out of bounds index./ );
129+
expect( () => doc1.sentences().itemAt( doc1.sentences().length() ) ).to.throw( /^wink-nlp: 3 is an invalid or out of bounds index./ );
130130
} );
131131

132-
it( '.tokens().itemAt() should return undefined for out of range index', function () {
133-
expect( doc1.tokens().itemAt( -1 ) ).to.equal( undefined );
134-
expect( doc1.tokens().itemAt( doc1.tokens().length() ) ).to.equal( undefined );
132+
it( '.tokens().itemAt() should throw out of range index error', function () {
133+
expect( () => doc1.tokens().itemAt( -1 ) ).to.throw( /^wink-nlp: -1 is an invalid or out of bounds index./ );
134+
expect( () => doc1.tokens().itemAt( doc1.tokens().length() ) ).to.throw( /^wink-nlp: 37 is an invalid or out of bounds index./ );
135135
} );
136136

137-
it( '.tokens().filter().itemAt() should return undefined for out of range index', function () {
137+
it( '.tokens().filter().itemAt() should throw out of range index error', function () {
138138
const ftk1 = doc1.tokens().filter( ( t ) => ( t.out( its.type ) === 'word' ) );
139-
expect( ftk1.itemAt( -1 ) ).to.equal( undefined );
140-
expect( ftk1.itemAt( ftk1.length() ) ).to.equal( undefined );
139+
expect( () => ftk1.itemAt( -1 ) ).to.throw( /^wink-nlp: wink-nlp: -1 is an invalid or out of bounds index./ );
140+
expect( () => ftk1.itemAt( ftk1.length() ) ).to.throw( /^wink-nlp: wink-nlp: 14 is an invalid or out of bounds index./ );
141141
} );
142142
} );
143143

@@ -232,7 +232,7 @@ describe( 'APIs — A', function () {
232232
const i22 = doc2.entities().itemAt( 5 );
233233
expect( i22.out( its.detail ) ).to.deep.equal( ae2[ 5 ] );
234234

235-
expect( doc2.entities().itemAt( 12 ) ).to.deep.equal( undefined );
235+
expect( () => doc2.entities().itemAt( 12 ) ).to.throw( /^wink-nlp: 12 is an invalid or out of bounds index./ );
236236
} );
237237

238238
it( '.filter() should return correctly filter entities', function () {
@@ -247,7 +247,7 @@ describe( 'APIs — A', function () {
247247
// Also check the parent document!
248248
expect( fe1.itemAt( 1 ).parentDocument() ).to.deep.equal( doc1 );
249249
// Out of range item test
250-
expect( fe1.itemAt( 2 ) ).to.deep.equal( undefined );
250+
expect( () => fe1.itemAt( 2 ) ).to.throw( /^wink-nlp: wink-nlp: 2 is an invalid or out of bounds index./ );
251251
// itemAt() api.
252252
fe1.each( ( e, k ) => {
253253
expect( e.out() ).to.deep.equal( fe1.itemAt( k ).out() );
@@ -260,7 +260,7 @@ describe( 'APIs — A', function () {
260260
expect( fe2.out( its.detail ) ).to.deep.equal( fae2 );
261261
expect( fe2.itemAt( 1 ).out( its.detail ) ).to.deep.equal( fae2[ 1 ] );
262262
expect( fe2.itemAt( 1 ).parentDocument() ).to.deep.equal( doc2 );
263-
expect( fe2.itemAt( 3 ) ).to.deep.equal( undefined );
263+
expect( () => fe2.itemAt( 3 ) ).to.throw( /^wink-nlp: wink-nlp: 3 is an invalid or out of bounds index./ );
264264
fe2.each( ( e, k ) => {
265265
expect( e.out() ).to.deep.equal( fe2.itemAt( k ).out() );
266266
} );

types/index.d.ts

+63-42
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ declare module 'wink-nlp' {
44
// turn off exporting by default since we don't want to expose internal details
55
export { };
66

7+
// *** BEGIN Language Model Specific Declarations ***
8+
// These should be always in sync with the langauge model's type declarations.
79
// these types are internal details of the implementing model
810
type StemAddon = unknown;
911
type LemmatizeAddon = unknown;
@@ -40,6 +42,7 @@ declare module 'wink-nlp' {
4042
featureFn: FeatureFn;
4143
addons: ModelAddons;
4244
}
45+
// *** END Language Model Specific Declarations ***
4346

4447
// its helpers
4548

@@ -106,25 +109,25 @@ declare module 'wink-nlp' {
106109

107110
// Its
108111
export interface ItsHelpers {
109-
case(index: number, token: Token, cache: Cache): Case;
110-
uniqueId(index: number, token: Token): number;
111-
negationFlag(index: number, token: Token): boolean;
112-
normal(index: number, token: Token, cache: Cache): string;
113-
contractionFlag(index: number, token: Token): boolean;
114-
pos(index: number, token: Token, cache: Cache): PartOfSpeech;
115-
precedingSpaces(index: number, token: Token): string;
116-
prefix(index: number, token: Token, cache: Cache): string;
117-
shape(index: number, token: Token, cache: Cache): string;
118-
stopWordFlag(index: number, token: Token, cache: Cache): boolean;
119-
abbrevFlag(index: number, token: Token, cache: Cache): boolean;
120-
suffix(index: number, token: Token, cache: Cache): string;
121-
type(index: number, token: Token, cache: Cache): string;
122-
value(index: number, token: Token, cache: Cache): string;
123-
stem(index: number, token: Token, cache: Cache, addons: ModelAddons): string;
124-
lemma(index: number, token: Token, cache: Cache, addons: ModelAddons): string;
112+
case(index: number, rdd: RawDocumentData): Case;
113+
uniqueId(index: number, rdd: RawDocumentData): number;
114+
negationFlag(index: number, rdd: RawDocumentData): boolean;
115+
normal(index: number, rdd: RawDocumentData): string;
116+
contractionFlag(index: number, rdd: RawDocumentData): boolean;
117+
pos(index: number, rdd: RawDocumentData): PartOfSpeech;
118+
precedingSpaces(index: number, rdd: RawDocumentData): string;
119+
prefix(index: number, rdd: RawDocumentData): string;
120+
shape(index: number, rdd: RawDocumentData): string;
121+
stopWordFlag(index: number, rdd: RawDocumentData): boolean;
122+
abbrevFlag(index: number, rdd: RawDocumentData): boolean;
123+
suffix(index: number, rdd: RawDocumentData): string;
124+
type(index: number, rdd: RawDocumentData): string;
125+
value(index: number, rdd: RawDocumentData): string;
126+
stem(index: number, rdd: RawDocumentData, addons: ModelAddons): string;
127+
lemma(index: number, rdd: RawDocumentData, addons: ModelAddons): string;
125128
vector(): number[];
126129
detail(): Detail;
127-
markedUpText(index: number, token: Token, cache: Cache): string;
130+
markedUpText(index: number, rdd: RawDocumentData): string;
128131
span(spanItem: number[]): number[];
129132
sentenceWiseImportance(rdd: RawDocumentData): SentenceImportance[];
130133
sentiment(spanItem: number[]): number;
@@ -134,7 +137,7 @@ declare module 'wink-nlp' {
134137
docBOWArray(tf: ModelTermFrequencies): Bow;
135138
bow(tf: ModelTermFrequencies): Bow;
136139
idf(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): Array<[term: string, frequency: number]>;
137-
tf(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): Array<[term: string, frequency: number]>;
140+
tf(tf: ModelTermFrequencies): Array<[term: string, frequency: number]>;
138141
modelJSON(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): string;
139142
}
140143

@@ -146,6 +149,7 @@ declare module 'wink-nlp' {
146149
freqTable<T>(tokens: T[]): Array<[token: T, freq: number]>;
147150
bigrams<T>(tokens: T[]): Array<[T, T]>;
148151
unique<T>(tokens: T[]): T[];
152+
vector(token: string[]): number[];
149153
}
150154

151155
// functions for use with document
@@ -168,19 +172,19 @@ declare module 'wink-nlp' {
168172
}
169173

170174
export interface SelectedTokens {
171-
each(f: (token: ItemToken) => void): void;
172-
filter(f: (token: ItemToken) => boolean): SelectedTokens;
173-
itemAt(k: number): ItemToken | undefined;
175+
each(cb: ((item: ItemToken) => void) | ((item: ItemToken, index: number) => void)): void;
176+
filter(cb: (item: ItemToken) => boolean): SelectedTokens;
177+
itemAt(k: number): ItemToken;
174178
length(): number;
175179
out(): string[];
176180
out<T>(itsf: ItsFunction<T>): T[] | string[];
177181
out<T, U>(itsf: ItsFunction<T>, asf: AsFunction<T, U>): U | T[] | string[];
178182
}
179183

180184
export interface Tokens {
181-
each(f: (token: ItemToken) => void): void;
182-
filter(f: (token: ItemToken) => boolean): SelectedTokens;
183-
itemAt(k: number): ItemToken | undefined;
185+
each(cb: ((item: ItemToken) => void) | ((item: ItemToken, index: number) => void)): void;
186+
filter(cb: (item: ItemToken) => boolean): SelectedTokens;
187+
itemAt(k: number): ItemToken;
184188
length(): number;
185189
out(): string[];
186190
out<T>(itsf: ItsFunction<T>): T[] | string[];
@@ -189,7 +193,7 @@ declare module 'wink-nlp' {
189193

190194
export interface ItemEntity {
191195
parentDocument(): Document;
192-
markup(beginMarker: string, endMarker: string): void;
196+
markup(beginMarker?: string, endMarker?: string): void;
193197
out(): string;
194198
out<T>(itsf: ItsFunction<T>): T | string;
195199
parentSentence(): ItemSentence;
@@ -198,19 +202,19 @@ declare module 'wink-nlp' {
198202
}
199203

200204
export interface SelectedEntities {
201-
each(f: (entity: ItemEntity) => void): void;
202-
filter(f: (entity: ItemEntity) => boolean): SelectedEntities;
203-
itemAt(k: number): ItemEntity | undefined;
205+
each(cb: ((item: ItemEntity) => void) | ((item: ItemEntity, index: number) => void)): void;
206+
filter(cb: (item: ItemEntity) => boolean): SelectedEntities;
207+
itemAt(k: number): ItemEntity;
204208
length(): number;
205209
out(): string[];
206210
out<T>(itsf: ItsFunction<T>): T[] | string[];
207211
out<T, U>(itsf: ItsFunction<T>, asf: AsFunction<T, U>): U | T[] | string[];
208212
}
209213

210214
export interface Entities {
211-
each(f: (entity: ItemEntity) => void): void;
212-
filter(f: (entity: ItemEntity) => boolean): SelectedEntities;
213-
itemAt(k: number): ItemEntity | undefined;
215+
each(cb: ((item: ItemEntity) => void) | ((item: ItemEntity, index: number) => void)): void;
216+
filter(cb: (item: ItemEntity) => boolean): SelectedEntities;
217+
itemAt(k: number): ItemEntity;
214218
length(): number;
215219
out(): string[];
216220
out<T>(itsf: ItsFunction<T>): T[] | string[];
@@ -219,7 +223,7 @@ declare module 'wink-nlp' {
219223

220224
export interface ItemCustomEntity {
221225
parentDocument(): Document;
222-
markup(beginMarker: string, endMarker: string): void;
226+
markup(beginMarker?: string, endMarker?: string): void;
223227
out(): string;
224228
out<T>(itsf: ItsFunction<T>): T | string;
225229
parentSentence(): ItemSentence;
@@ -228,19 +232,19 @@ declare module 'wink-nlp' {
228232
}
229233

230234
export interface SelectedCustomEntities {
231-
each(f: (entity: ItemCustomEntity) => void): void;
232-
filter(f: (entity: ItemCustomEntity) => boolean): SelectedCustomEntities;
233-
itemAt(k: number): ItemCustomEntity | undefined;
235+
each(cb: ((item: ItemCustomEntity) => void) | ((item: ItemCustomEntity, index: number) => void)): void;
236+
filter(cb: (item: ItemCustomEntity) => boolean): SelectedCustomEntities;
237+
itemAt(k: number): ItemCustomEntity;
234238
length(): number;
235239
out(): string[];
236240
out<T>(itsf: ItsFunction<T>): T[] | string[];
237241
out<T, U>(itsf: ItsFunction<T>, asf: AsFunction<T, U>): U | T[] | string[];
238242
}
239243

240244
export interface CustomEntities {
241-
each(f: (entity: ItemCustomEntity) => void): void;
242-
filter(f: (entity: ItemCustomEntity) => boolean): SelectedCustomEntities;
243-
itemAt(k: number): ItemCustomEntity | undefined;
245+
each(cb: ((item: ItemCustomEntity) => void) | ((item: ItemCustomEntity, index: number) => void)): void;
246+
filter(cb: (item: ItemCustomEntity) => boolean): SelectedCustomEntities;
247+
itemAt(k: number): ItemCustomEntity;
244248
length(): number;
245249
out(): string[];
246250
out<T>(itsf: ItsFunction<T>): T[] | string[];
@@ -249,7 +253,7 @@ declare module 'wink-nlp' {
249253

250254
export interface ItemSentence {
251255
parentDocument(): Document;
252-
markup(beginMarker: string, endMarker: string): void;
256+
markup(beginMarker?: string, endMarker?: string): void;
253257
out(): string;
254258
out<T>(itsf: ItsFunction<T>): T | string;
255259
entities(): Entities;
@@ -259,8 +263,8 @@ declare module 'wink-nlp' {
259263
}
260264

261265
export interface Sentences {
262-
each(f: (entity: ItemSentence) => void): void;
263-
itemAt(k: number): ItemSentence | undefined;
266+
each(cb: ((item: ItemSentence) => void) | ((item: ItemSentence, index: number) => void)): void;
267+
itemAt(k: number): ItemSentence;
264268
length(): number;
265269
out(): string[];
266270
out<T>(itsf: ItsFunction<T>): T[] | string[];
@@ -277,6 +281,8 @@ declare module 'wink-nlp' {
277281
sentences(): Sentences;
278282
tokens(): Tokens;
279283
printTokens(): void;
284+
pipeConfig(): string[];
285+
contextualVectors(lemma: boolean, specifcWordVectors: string[], similarWordVectors: boolean, wordVectorsLimit: number): string;
280286
}
281287

282288
export interface CerExample {
@@ -295,6 +301,18 @@ declare module 'wink-nlp' {
295301
patterns: string[];
296302
}
297303

304+
// Wink word embeddings structure, should stay in sync with emdedding repo.
305+
interface WordEmbedding {
306+
precision: number;
307+
l2NormIndex: number;
308+
wordIndex: number;
309+
dimensions: number;
310+
unkVector: number[];
311+
size: number;
312+
words: string[];
313+
vectors: Record<string, number[]>;
314+
}
315+
298316
export interface WinkMethods {
299317
readDoc(text: string): Document;
300318
// returns number of learned entities
@@ -303,7 +321,7 @@ declare module 'wink-nlp' {
303321
as: AsHelpers;
304322
}
305323

306-
export default function WinkFn(theModel: Model, pipe?: string[]): WinkMethods;
324+
export default function WinkFn(theModel: Model, pipe?: string[], wordEmbeddings?: WordEmbedding): WinkMethods;
307325
}
308326

309327
declare module 'wink-nlp/utilities/bm25-vectorizer' {
@@ -348,6 +366,9 @@ declare module 'wink-nlp/utilities/similarity' {
348366
tversky<T>(setA: Set<T>, setB: Set<T>, alpha?: number, beta?: number): number;
349367
oo<T>(setA: Set<T>, setB: Set<T>): number;
350368
};
369+
vector: {
370+
cosine(vectorA: number[], vectorB: number[]): number;
371+
};
351372
}
352373

353374
const similarity: SimilarityHelper;

0 commit comments

Comments
 (0)