This repository has been archived by the owner on Jun 21, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcognition.js
353 lines (298 loc) · 12.2 KB
/
cognition.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
require('dotenv').config()
const fetch = require('node-fetch');
const levenshtein = require('fast-levenshtein');
// TODO FIXME Sometimes role + name get caught in the same fragment. Account for that.
const FragmentType = {
UNKNOWN: 0,
ROLE: 1,
NAME: 2,
OTHER: 3,
};
const Role = {
'Graf von Krolock': { synonyms: [], isMainCast: true, singular: true },
'Sarah': { synonyms: [], isMainCast: true, singular: true },
'Alfred': { synonyms: [], isMainCast: true, singular: true },
'Professor Abronsius': { synonyms: ['Prof. Abronsius'], isMainCast: true, singular: true },
'Chagal': { synonyms: [], isMainCast: true, singular: true },
'Magda': { synonyms: [], isMainCast: true, singular: true },
'Herbert': { synonyms: [], isMainCast: true, singular: true },
'Rebecca': { synonyms: [], isMainCast: true, singular: true },
'Koukol': { synonyms: [], isMainCast: true, singular: true },
'Tanzsolisten': { synonyms: ['Solotänzer'], isMainCast: false, singular: false },
'Gesangssolisten': { synonyms: [], isMainCast: false, singular: false },
'Tanzensemble': { synonyms: [], isMainCast: false, singular: false },
'Gesangsensemble': { synonyms: [], isMainCast: false, singular: false },
'Dirigent': { synonyms: [], isMainCast: false, singular: true },
};
const executeOcr = (stream, key) => {
const url = `https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/ocr?language=de&detectOrientation=true`;
const params = {
method: 'POST',
accept: 'application/json',
headers: {
'Content-Type': 'application/octet-stream',
'Ocp-Apim-Subscription-Key': key,
},
body: stream,
};
return fetch(url, params)
.then(response => response.json());
};
const fetchRoleToPersons = () => {
const url = `http://localhost:3001/api/roles`;
const params = {
method: 'GET',
accept: 'application/json',
};
return fetch(url, params)
.then(response => response.json());
};
const explodeBoundingBox = obj => {
const [x, y, width, height] = obj.boundingBox.split(/,/).map(str => +str);
return { x, y, width, height };
};
const mergeWords = words => words.map(word => word.text).join(' ');
const convertFragment = fragment => {
const boundingBox = explodeBoundingBox(fragment);
const text = mergeWords(fragment.words);
return { boundingBox, text };
};
const enrichFragment = fragment => {
return Object.assign({}, fragment, {
type: FragmentType.UNKNOWN,
role: undefined,
});
};
const sortByX = (left, right) => left.boundingBox.x - right.boundingBox.x;
const sortByY = (left, right) => left.boundingBox.y - right.boundingBox.y;
const convertAverageFragmentHeight = fragments => {
const sum = fragments
.map(fragment => fragment.boundingBox.height)
.reduce((a, b) => a + b, 0);
return sum / fragments.length;
};
const getGroupByLineReducer = fragments => {
const averageFragmentHeight = convertAverageFragmentHeight(fragments);
const getBucket = y => Math.floor(y / averageFragmentHeight);
return (accumulated, fragment) => {
let bucket = getBucket(fragment.boundingBox.y);
/* If the bucket doesn't yet exist, but the previous bucket does and has only a single entry,
* check if that other fragment and this one appear to be on roughly the same line. If they
* are, put them together.
* This counteracts the hard cut-off in the calculation of the bucket itself. */
if (!accumulated[bucket] && bucket - 1 >= 0 && accumulated[bucket - 1]
&& accumulated[bucket - 1].length === 1
&& Math.abs(accumulated[bucket - 1][0].boundingBox.y - fragment.boundingBox.y) <= averageFragmentHeight / 2) {
bucket--;
}
(accumulated[bucket] = accumulated[bucket] || []).push(fragment);
return accumulated;
};
};
const flatten = (x, y) => [...x, ...y];
/**
* Returns an array of arrays of objects of the following schema:
*
* {
* boundingBox: { x: Number, y: Number, width: Number, height: Number },
* text: String,
* type: FragmentType,
* role?: String,
* }
*
* The array itself is sorted on the y-axis from top (y = 0) to bottom (y = max).
*
* Each sub-array represents a line after grouping them by their y-value based on
* the average fragment height. The fragments inside the sub-arrays are sorted by
* their x-value.
*/
const convertOcrResponse = response => {
if (!response.regions) {
return [];
}
const fragments = response.regions
/* We are not interested in regions, so remove them. */
.map(region => region.lines)
/* Flatten the resulting array to get an array of fragments. */
.reduce(flatten, [])
/* Transform each fragment into something easier to handle… */
.map(convertFragment)
/* … and add some of our own properties. */
.map(enrichFragment);
const data = [...fragments]
/* Sort by y-value so the following reducer always goes top to bottom. */
.sort(sortByY)
/* Calculate the types of each fragment. */
.map(assignType)
/* Group fragments into lines by buckets. */
.reduce(getGroupByLineReducer(fragments), [])
/* Remove lines that hold no fragment. */
.filter(line => line)
/* Sort fragments within a line by x-value. */
.map(line => [...line].sort(sortByX));
return data
.filter(getHeaderFilter(data));
};
const normalize = str => {
return str
.toLowerCase()
.normalize('NFD')
.replace(/[.\/\\]/, '')
.replace(/[\u0300-\u036f]/g, '');
};
const matches = (a, b) => {
const left = normalize(a);
const right = normalize(b);
return levenshtein.get(left, right) <= 3;
};
const matchesAny = (str, roles) => roles.some(role => matches(str, role));
const assignType = fragment => {
const name = fragment.text;
/* Rule out some noise. */
if (matchesAny(name, ['Tanz der', 'Vampire', 'Musical'])) {
fragment.type = FragmentType.OTHER;
}
Object.keys(Role).forEach(role => {
if (fragment.type) {
return;
}
if (matchesAny(name, [role, ...Role[role].synonyms])) {
fragment.type = FragmentType.ROLE;
fragment.role = role;
}
});
/* If we haven't found anything else, assume it's a name or list of names. */
if (!fragment.type) {
fragment.type = FragmentType.NAME;
}
return fragment;
};
const matchName = (name, candidates) => {
const diffs = candidates
.map(current => {
return { name: current, distance: levenshtein.get(current, name) };
})
.sort((a, b) => a.distance - b.distance);
return diffs[0].name;
};
const matchNames = (str, candidates) => {
return str
.split(/\s*[.,]\s*/)
.filter(name => name && name.trim().length !== 0)
.map(name => matchName(name, candidates))
.filter(name => name);
};
const getHeaderFilter = data => {
const roleLines = data
.filter(line => line.some(fragment => fragment.type === FragmentType.ROLE));
const first = (roleLines.length !== 0 && roleLines[0].length !== 0) ? roleLines[0][0] : null;
return line => {
return !first || line.some(fragment => fragment.boundingBox.y >= first.boundingBox.y);
};
};
const lineToAverageY = line => {
return line
.map(fragment => fragment.boundingBox.y + 0.5 * fragment.boundingBox.height)
.reduce((avg, y, _, all) => avg + y / all.length, 0);
};
/**
* Returns the median spacing on the y-axis between name fragments.
*
*/
const getMedianLineSpacing = data => {
const spaces = data
/* We are only interested in name fragments. */
.map(line => line.filter(fragment => fragment.type === FragmentType.NAME))
/* Filter out lines that no longer contain a fragment. */
.filter(line => line)
/* Average the y-coordinate of the fragments per line. */
.map(lineToAverageY)
/* Now subtract each coordinate from its successor. */
.map((y, i, all) => y - (all[i-1] || y))
/* Remove the first entry as it will always be 0. */
.filter((_, i) => i !== 0)
/* Sort the differences */
.sort((a, b) => a - b);
const middle = Math.floor(spaces.length / 2);
return spaces.length % 2 === 0
? (spaces[middle - 1] + spaces[middle]) / 2
: spaces[middle];
};
// TODO FIXME Refactor this.
const extractCast = (data, roleToPersons) => {
const findMainCast = role => {
const lines = data
.filter(line => line.some(fragment => fragment.role === role));
if (lines.length === 0) {
return [];
}
const roleFragment = lines[0]
.filter(fragment => fragment.role === role)
[0];
return lines[0]
/* We are only interested in names. */
.filter(fragment => fragment.type === FragmentType.NAME)
/* Make sure the name is reasonably close to the role fragment. */
.filter(fragment => {
return fragment.boundingBox.x <= roleFragment.boundingBox.y + 1.5 * roleFragment.boundingBox.width;
});
};
const findSecondaryCast = role => {
const secondaryRoles = Object.keys(Role).filter(role => !Role[role].isMainCast);
const medianLineSpacing = getMedianLineSpacing(data);
let foundRoleFragment = false;
let foundNextRoleFragment = false;
let exceededSpacing = false;
const lines = data
/* First we filter lines between role fragments as much as we can. */
.filter(line => {
foundNextRoleFragment |= foundRoleFragment
&& line.some(fragment => fragment.role && secondaryRoles.includes(fragment.role));
foundRoleFragment |= line.some(fragment => fragment.role === role);
return foundRoleFragment && !foundNextRoleFragment;
})
/* Now we also pay attention to line spacing as the next role fragment may not have been recognized. */
.filter((line, i, all) => {
/* We always take the first two lines since it includes the role fragment, but sometimes
* the conductor appears on the same line so we need to keep it. For the other cases the
* second line is the first line of name fragments. */
if (i === 0 || i === 1) {
return true;
}
const last = lineToAverageY(all[i - 1]);
const current = lineToAverageY(line);
exceededSpacing |= Math.abs(last - current) > 2 * medianLineSpacing;
return !exceededSpacing;
});
if (lines.length === 0) {
return [];
}
const roleFragment = lines[0]
.filter(fragment => fragment.role === role)
[0];
return lines
.reduce(flatten, [])
.filter(fragment => fragment.type === FragmentType.NAME)
/* Make sure to only catch fragments reasonably close to the role fragment. */
.filter(fragment => {
const centerRoleFragment = roleFragment.boundingBox.x + roleFragment.boundingBox.width / 2;
const centerFragment = fragment.boundingBox.x + fragment.boundingBox.width / 2;
return Math.abs(centerRoleFragment - centerFragment) <= 1.5 * roleFragment.boundingBox.width;
});
};
return Object.keys(Role).map(role => {
const candidates = roleToPersons
.filter(obj => obj.role === role)
[0]
.persons
.map(person => person.name);
const nameFragments = Role[role].isMainCast ? findMainCast(role) : findSecondaryCast(role);
const names = nameFragments
.map(fragment => matchNames(fragment.text, candidates))
.reduce(flatten, [])
.filter((name, i, all) => all.indexOf(name) === i)
.filter((name, i) => !Role[role].singular || i === 0 );
return { role, names };
});
};
module.exports = { executeOcr, fetchRoleToPersons, convertOcrResponse, extractCast };