@@ -193,6 +193,7 @@ as.vector = function ( tokens, rdd ) {
193
193
const size = rdd . wordVectors . dimensions ;
194
194
const precision = rdd . wordVectors . precision ;
195
195
const vectors = rdd . wordVectors . vectors ;
196
+ const l2NormIndex = rdd . wordVectors . l2NormIndex ;
196
197
197
198
// Set up a new initialized vector of `size`
198
199
const v = new Array ( size ) ;
@@ -203,8 +204,11 @@ as.vector = function ( tokens, rdd ) {
203
204
for ( let i = 0 ; i < tokens . length ; i += 1 ) {
204
205
// Extract token vector for the current token.
205
206
const tv = vectors [ tokens [ i ] . toLowerCase ( ) ] ;
206
- // Increment `numOfTokens` if the above operation was successful.
207
- if ( tv !== undefined ) numOfTokens += 1 ;
207
+ // Increment `numOfTokens` if the above operation was successful
208
+ // AND l2Norm is non-zero, because for UNK vectors it is set to 0.
209
+ // The later is applicable for the contextual vectors, where in event
210
+ // of UNK, an all zero vectors is set for UNK word.
211
+ if ( tv !== undefined && tv [ l2NormIndex ] !== 0 ) numOfTokens += 1 ;
208
212
for ( let j = 0 ; j < size ; j += 1 ) {
209
213
// Keep summing, eventually it will be divided by `numOfTokens` to obtain avareage.
210
214
v [ j ] += ( tv === undefined ) ? 0 : tv [ j ] ;
0 commit comments