Skip to content

Commit d22c991

Browse files
committed
Support Antlr Lexer Modes
1 parent d636274 commit d22c991

File tree

12 files changed

+294
-86
lines changed

12 files changed

+294
-86
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package org.fife.ui.rsyntaxtextarea.modes.antlr;
2+
3+
import org.antlr.v4.runtime.ANTLRErrorListener;
4+
import org.antlr.v4.runtime.Parser;
5+
import org.antlr.v4.runtime.RecognitionException;
6+
import org.antlr.v4.runtime.Recognizer;
7+
import org.antlr.v4.runtime.atn.ATNConfigSet;
8+
import org.antlr.v4.runtime.dfa.DFA;
9+
10+
import java.util.BitSet;
11+
12+
/**
13+
* A {@link ANTLRErrorListener} that throws a RuntimeException for every error.
14+
*
15+
* @author Markus Heberling
16+
*/
17+
class AlwaysThrowingErrorListener implements ANTLRErrorListener {
18+
@Override
19+
public void syntaxError(
20+
Recognizer<?, ?> recognizer,
21+
Object offendingSymbol,
22+
int line,
23+
int charPositionInLine,
24+
String msg,
25+
RecognitionException e) {
26+
throw new AntlrException();
27+
}
28+
29+
@Override
30+
public void reportAmbiguity(
31+
Parser recognizer,
32+
DFA dfa,
33+
int startIndex,
34+
int stopIndex,
35+
boolean exact,
36+
BitSet ambigAlts,
37+
ATNConfigSet configs) {
38+
throw new AntlrException();
39+
}
40+
41+
@Override
42+
public void reportAttemptingFullContext(
43+
Parser recognizer,
44+
DFA dfa,
45+
int startIndex,
46+
int stopIndex,
47+
BitSet conflictingAlts,
48+
ATNConfigSet configs) {
49+
throw new AntlrException();
50+
}
51+
52+
@Override
53+
public void reportContextSensitivity(
54+
Parser recognizer,
55+
DFA dfa,
56+
int startIndex,
57+
int stopIndex,
58+
int prediction,
59+
ATNConfigSet configs) {
60+
throw new AntlrException();
61+
}
62+
63+
static class AntlrException extends RuntimeException {
64+
}
65+
}
Lines changed: 109 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
11
package org.fife.ui.rsyntaxtextarea.modes.antlr;
22

3-
import org.antlr.v4.runtime.*;
4-
import org.antlr.v4.runtime.atn.ATNConfigSet;
5-
import org.antlr.v4.runtime.dfa.DFA;
3+
import org.antlr.v4.runtime.CommonToken;
4+
import org.antlr.v4.runtime.Lexer;
65
import org.fife.ui.rsyntaxtextarea.Token;
76
import org.fife.ui.rsyntaxtextarea.TokenMakerBase;
87

98
import javax.swing.text.Segment;
109
import java.util.Arrays;
11-
import java.util.BitSet;
1210
import java.util.List;
11+
import java.util.Optional;
1312

13+
/**
14+
* A {@link org.fife.ui.rsyntaxtextarea.TokenMaker} that converts {@link CommonToken} to {@link Token}.
15+
*
16+
* @author Markus Heberling
17+
*/
1418
public abstract class AntlrTokenMaker extends TokenMakerBase {
1519

20+
private final ModeInfoManager modeInfoManager = new ModeInfoManager();
21+
1622
private final List<MultiLineTokenInfo> multiLineTokenInfos;
1723

1824
protected AntlrTokenMaker(MultiLineTokenInfo... multiLineTokenInfos) {
@@ -25,6 +31,8 @@ public int getClosestStandardTokenTypeForInternalType(int type) {
2531
if (type == CommonToken.INVALID_TYPE) {
2632
// mark as error
2733
return Token.ERROR_IDENTIFIER;
34+
} else if (type < 0) {
35+
return modeInfoManager.getModeInfo(type).tokenType;
2836
} else {
2937
return convertType(type);
3038
}
@@ -35,26 +43,30 @@ public int getClosestStandardTokenTypeForInternalType(int type) {
3543
public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
3644
String line = text.toString();
3745
resetTokenList();
38-
MultiLineTokenInfo initialMultiLineTokenInfo = getMultiLineTokenInfo(getLanguageIndex(), initialTokenType);
39-
String multilineTokenStart = initialMultiLineTokenInfo == null ? null : initialMultiLineTokenInfo.tokenStart;
40-
if (initialMultiLineTokenInfo != null) {
46+
47+
// the modes to push
48+
ModeInfoManager.ModeInfo modeInfo = modeInfoManager.getModeInfo(initialTokenType);
49+
// we need to set it, so that the correct multiline token can be found
50+
setLanguageIndex(modeInfo.currentMode);
51+
52+
String multilineTokenStart = getMultilineTokenStart(modeInfo);
53+
if (multilineTokenStart != null) {
4154
// we are inside a multi line token, so prefix the text with the token start
4255
line = multilineTokenStart + line;
4356
}
4457

4558
// check if we have a multi line token start without an end
46-
String multilineTokenEnd = null;
47-
for (MultiLineTokenInfo info : multiLineTokenInfos) {
48-
int tokenStartPos = line.indexOf(info.tokenStart);
49-
if (tokenStartPos > -1 && line.indexOf(info.tokenEnd, tokenStartPos + info.tokenStart.length()) == -1) {
50-
//we are in the middle of a multi line token, we need to end it so the lexer can recognize it
51-
multilineTokenEnd = info.tokenEnd;
52-
line += multilineTokenEnd;
53-
break;
54-
}
59+
String multilineTokenEnd = getMultilineTokenEnd(line);
60+
if (multilineTokenEnd != null) {
61+
line += multilineTokenEnd;
5562
}
5663

5764
Lexer lexer = createLexer(line);
65+
for (int mode : modeInfo.modeStack.toArray()) {
66+
// push the modes into the lexer, so it knows where it is
67+
lexer.pushMode(mode);
68+
}
69+
lexer.mode(modeInfo.currentMode);
5870
lexer.removeErrorListeners();
5971
lexer.addErrorListener(new AlwaysThrowingErrorListener());
6072

@@ -64,46 +76,42 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
6476
try {
6577
while (true) {
6678
org.antlr.v4.runtime.Token at = lexer.nextToken();
79+
setLanguageIndex(lexer._mode);
6780
if (at.getType() == CommonToken.EOF) {
6881
if (multilineTokenEnd == null) {
6982
addNullToken();
7083
}
7184
break;
7285
} else {
73-
int end = currentArrayOffset + at.getText().length() - 1;
74-
if (initialMultiLineTokenInfo != null && multilineTokenStart != null
75-
&& at.getText().startsWith(multilineTokenStart)) {
76-
// need to subtract our inserted token start
77-
end -= multilineTokenStart.length();
78-
}
79-
if (multilineTokenEnd != null &&
80-
at.getText().endsWith(multilineTokenEnd)) {
81-
//need to subtract our inserted token end
82-
end -= multilineTokenEnd.length();
83-
}
84-
addToken(text, currentArrayOffset, end, getClosestStandardTokenTypeForInternalType(at.getType()),
85-
currentDocumentOffset);
86+
addToken(
87+
text,
88+
currentArrayOffset,
89+
currentDocumentOffset,
90+
multilineTokenStart,
91+
multilineTokenEnd,
92+
at);
8693
// update from current token
8794
currentArrayOffset = currentToken.textOffset + currentToken.textCount;
8895
currentDocumentOffset = currentToken.getEndOffset();
8996
}
9097
}
91-
} catch (AntlrException exceptionInstanceNotNeeded) {
98+
} catch (AlwaysThrowingErrorListener.AntlrException exceptionInstanceNotNeeded) {
9299
// mark the rest of the line as error
93-
final String remainingText = String.valueOf(text.array, currentArrayOffset,
94-
text.offset - currentArrayOffset + text.count);
95-
int type;
96-
97-
if (initialMultiLineTokenInfo != null) {
98-
type = initialMultiLineTokenInfo.token;
99-
} else {
100-
type = Token.ERROR_IDENTIFIER;
101-
}
100+
final String remainingText =
101+
String.valueOf(
102+
text.array, currentArrayOffset, text.offset - currentArrayOffset + text.count);
102103

103-
addToken(text, currentArrayOffset, currentArrayOffset + remainingText.length() - 1, type, currentDocumentOffset);
104+
int type = multilineTokenStart != null ? modeInfo.tokenType : Token.ERROR_IDENTIFIER;
104105

105-
if (initialMultiLineTokenInfo == null) {
106-
//we are not in a multiline token, so we assume the line ends here
106+
addToken(
107+
text,
108+
currentArrayOffset,
109+
currentArrayOffset + remainingText.length() - 1,
110+
type,
111+
currentDocumentOffset);
112+
113+
if (multilineTokenStart == null) {
114+
// we are not in a multiline token, so we assume the line ends here
107115
addNullToken();
108116
}
109117
}
@@ -114,63 +122,78 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
114122
}
115123

116124
if (firstToken.getType() == Token.NULL && firstToken == currentToken) {
117-
//empty line, copy type from last line
118-
firstToken.setType(initialTokenType);
125+
// empty line, copy type from last line
126+
firstToken.setType(modeInfo.tokenType);
119127
firstToken.text = new char[0];
120128
firstToken.textCount = 0;
121129
}
122-
return firstToken;
123-
}
124-
125-
private MultiLineTokenInfo getMultiLineTokenInfo(int languageIndex, int token) {
126-
return multiLineTokenInfos.stream().filter(i -> i.languageIndex == languageIndex).filter(i -> i.token == token).findFirst().orElse(null);
127-
}
128-
129-
protected abstract Lexer createLexer(String text);
130130

131-
protected static class MultiLineTokenInfo {
132-
private final int languageIndex;
133-
134-
private final int token;
135-
136-
private final String tokenStart;
137-
138-
private final String tokenEnd;
139-
140-
public MultiLineTokenInfo(int languageIndex, int token, String tokenStart, String tokenEnd) {
141-
this.languageIndex = languageIndex;
142-
this.token = token;
143-
this.tokenStart = tokenStart;
144-
this.tokenEnd = tokenEnd;
131+
if (!lexer._modeStack.isEmpty() || lexer._mode != Lexer.DEFAULT_MODE) {
132+
currentToken.setType(
133+
modeInfoManager.storeModeInfo(currentToken.getType(), lexer._mode, lexer._modeStack));
145134
}
135+
136+
return firstToken;
146137
}
147138

148-
/**
149-
* A {@link ANTLRErrorListener} that throws a RuntimeException for every error
150-
*/
151-
private static class AlwaysThrowingErrorListener implements ANTLRErrorListener {
152-
@Override
153-
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
154-
throw new AntlrException();
155-
}
139+
private void addToken(
140+
Segment text,
141+
int start,
142+
int startOffset,
143+
String multilineTokenStart,
144+
String multilineTokenEnd,
145+
org.antlr.v4.runtime.Token at) {
146+
addToken(
147+
text,
148+
start,
149+
calculateTokenEnd(multilineTokenStart, multilineTokenEnd, start, at),
150+
getClosestStandardTokenTypeForInternalType(at.getType()),
151+
startOffset);
152+
}
156153

157-
@Override
158-
public void reportAmbiguity(Parser recognizer, DFA dfa, int startIndex, int stopIndex, boolean exact, BitSet ambigAlts, ATNConfigSet configs) {
159-
throw new AntlrException();
154+
private int calculateTokenEnd(
155+
String multilineTokenStart,
156+
String multilineTokenEnd,
157+
int currentArrayOffset,
158+
org.antlr.v4.runtime.Token at) {
159+
int end = currentArrayOffset + at.getText().length() - 1;
160+
if (multilineTokenStart != null && at.getText().startsWith(multilineTokenStart)) {
161+
// need to subtract our inserted token start
162+
end -= multilineTokenStart.length();
160163
}
161-
162-
@Override
163-
public void reportAttemptingFullContext(Parser recognizer, DFA dfa, int startIndex, int stopIndex, BitSet conflictingAlts, ATNConfigSet configs) {
164-
throw new AntlrException();
164+
if (multilineTokenEnd != null && at.getText().endsWith(multilineTokenEnd)) {
165+
// need to subtract our inserted token end
166+
end -= multilineTokenEnd.length();
165167
}
168+
return end;
169+
}
166170

167-
@Override
168-
public void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, int stopIndex, int prediction, ATNConfigSet configs) {
169-
throw new AntlrException();
170-
}
171+
private String getMultilineTokenStart(ModeInfoManager.ModeInfo modeInfo) {
172+
return getMultiLineTokenInfo(getLanguageIndex(), modeInfo.tokenType)
173+
.map(i -> i.tokenStart)
174+
.orElse(null);
171175
}
172176

173-
private static class AntlrException extends RuntimeException {
177+
private String getMultilineTokenEnd(String line) {
178+
return multiLineTokenInfos.stream()
179+
// the language index matches our current language
180+
.filter(i -> i.languageIndex == getLanguageIndex())
181+
// the line contains the token start
182+
.filter(i -> line.contains(i.tokenStart))
183+
// the line doesn't contain the token end after the token start
184+
.filter(
185+
i -> line.indexOf(i.tokenEnd, line.indexOf(i.tokenStart) + i.tokenStart.length()) == -1)
186+
.map(i -> i.tokenEnd)
187+
.findFirst()
188+
.orElse(null);
189+
}
174190

191+
private Optional<MultiLineTokenInfo> getMultiLineTokenInfo(int languageIndex, int token) {
192+
return multiLineTokenInfos.stream()
193+
.filter(i -> i.languageIndex == languageIndex)
194+
.filter(i -> i.token == token)
195+
.findFirst();
175196
}
197+
198+
protected abstract Lexer createLexer(String text);
176199
}

0 commit comments

Comments
 (0)