Skip to content

Commit

Permalink
RTL workaround for brat visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
spyysalo committed Sep 30, 2014
1 parent ccd95dc commit 166a7b6
Showing 1 changed file with 65 additions and 5 deletions.
70 changes: 65 additions & 5 deletions conllu.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

/*
CoNLL-U format library for JavaScript.
Home:http://github.com/spyysalo/conllu.js
Home: http://github.com/spyysalo/conllu.js
Format: http://universaldependencies.github.io/docs/format.html
Author: Sampo Pyysalo
Expand Down Expand Up @@ -288,11 +288,39 @@ var ConllU = (function(window, undefined) {
});
};

// return the text of the sentence for visualization with brat
Sentence.prototype.bratText = function() {
// return words with possible modifications for visualization with
// brat
Sentence.prototype.bratWords = function() {
var words = this.words();

for (var i=0; i<words.length; i++) {
if (isRtl(words[i].form)) {
words[i] = deepCopy(words[i]);
words[i].form = rtlFix(words[i].form);
}
}

return words;
};

// return tokens with possible modifications for visualization
// with brat
Sentence.prototype.bratTokens = function() {
var tokens = this.tokens();

for (var i=0; i<tokens.length; i++) {
tokens[i] = deepCopy(tokens[i]);
tokens[i].form = rtlFix(tokens[i].form);
}

return tokens;
};

// return the text of the sentence for visualization with brat
Sentence.prototype.bratText = function() {
var words = this.bratWords();
var tokens = this.bratTokens();

var wordText = words.map(function(w) { return w.form }).join(' ');
var tokenText = tokens.map(function(w) { return w.form }).join(' ');

Expand All @@ -311,7 +339,7 @@ var ConllU = (function(window, undefined) {
offset = this.baseOffset;

// create an annotation for each word
var words = this.words();
var words = this.bratWords();
for (var i=0; i<words.length; i++) {
var length = words[i].form.length;
spans.push([this.id+'-T'+words[i].id, words[i].cpostag,
Expand Down Expand Up @@ -1179,9 +1207,41 @@ var ConllU = (function(window, undefined) {
};

var nullLogger = function(message) {
return null;
return null;
}

/*
* Return true iff given string only contains characters from a
* right-to-left Unicode block and is not empty.
*/
var isRtl = function(s) {
// range from http://stackoverflow.com/a/14824756
return !!s.match(/^[\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC]+$/);
};

/*
* Return given token with possible modifications to accommodate
* issues in brat rendering of right-to-left text
* (https://github.com/UniversalDependencies/docs/issues/52)
*/
var rtlFix = function(s) {
var prefix = '\u02D1',
suffix = '\u02D1';
if (isRtl(s)) {
s = prefix + s + suffix;
}
return s;
};

/*
* Return a deep copy of the given object. Note: not particularly
* efficient, and all fields must be serializable for this to work
* correctly.
*/
var deepCopy = function(o) {
return JSON.parse(JSON.stringify(o));
};

/*
* Regular expressions for various parts of the format.
* See https://github.com/UniversalDependencies/docs/issues/33
Expand Down

0 comments on commit 166a7b6

Please sign in to comment.