@Override public Handle range(GrammarAST a, GrammarAST b) { ATNState left = newState(a); ATNState right = newState(b); int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText()); int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText()); left.addTransition(new RangeTransition(right, t1, t2)); a.atnState = left; b.atnState = left; return new Handle(left, right); }
/** * Given a token type, get a meaningful name for it such as the ID or string literal. If this is a * lexer and the ttype is in the char vocabulary, compute an ANTLR-valid (possibly escaped) char * literal. */ public String getTokenDisplayName(int ttype) { // inside any target's char range and is lexer grammar? if (isLexer() && ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE) { return CharSupport.getANTLRCharLiteralForChar(ttype); } if (ttype == Token.EOF) { return "EOF"; } if (ttype == Token.INVALID_TYPE) { return INVALID_TOKEN_NAME; } if (ttype >= 0 && ttype < typeToStringLiteralList.size() && typeToStringLiteralList.get(ttype) != null) { return typeToStringLiteralList.get(ttype); } if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) { return typeToTokenList.get(ttype); } return String.valueOf(ttype); }
public IntervalSet getSetFromCharSetLiteral(GrammarAST charSetAST) { String chars = charSetAST.getText(); chars = chars.substring(1, chars.length() - 1); String cset = '"' + chars + '"'; IntervalSet set = new IntervalSet(); // unescape all valid escape char like \n, leaving escaped dashes as '\-' // so we can avoid seeing them as '-' range ops. chars = CharSupport.getStringFromGrammarStringLiteral(cset); // now make x-y become set of char int n = chars.length(); for (int i = 0; i < n; i++) { int c = chars.charAt(i); if (c == '\\' && (i + 1) < n && chars.charAt(i + 1) == '-') { // \- set.add('-'); i++; } else if ((i + 2) < n && chars.charAt(i + 1) == '-') { // range x-y int x = c; int y = chars.charAt(i + 2); if (x <= y) set.add(x, y); i += 2; } else { set.add(c); } } return set; }
@Override public Handle lexerCallCommand(GrammarAST ID, GrammarAST arg) { LexerAction lexerAction = createLexerAction(ID, arg); if (lexerAction != null) { return action(ID, lexerAction); } // fall back to standard action generation for the command ST cmdST = codegenTemplates.getInstanceOf("Lexer" + CharSupport.capitalize(ID.getText()) + "Command"); if (cmdST == null) { g.tool.errMgr.grammarError( ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText()); return epsilon(ID); } if (cmdST.impl.formalArguments == null || !cmdST.impl.formalArguments.containsKey("arg")) { g.tool.errMgr.grammarError( ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText()); return epsilon(ID); } cmdST.add("arg", arg.getText()); cmdST.add("grammar", arg.g); return action(cmdST.render()); }
protected int getTokenType(@NotNull GrammarAST atom) { int ttype; if (g.isLexer()) { ttype = CharSupport.getCharValueFromGrammarCharLiteral(atom.getText()); } else { ttype = g.getTokenType(atom.getText()); } return ttype; }
@Override public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean invert) { ATNState left = newState(associatedAST); ATNState right = newState(associatedAST); IntervalSet set = new IntervalSet(); for (GrammarAST t : alts) { if (t.getType() == ANTLRParser.RANGE) { int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText()); int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText()); set.add(a, b); } else if (t.getType() == ANTLRParser.LEXER_CHAR_SET) { set.addAll(getSetFromCharSetLiteral(t)); } else if (t.getType() == ANTLRParser.STRING_LITERAL) { int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText()); if (c != -1) { set.add(c); } else { g.tool.errMgr.grammarError( ErrorType.INVALID_LITERAL_IN_LEXER_SET, g.fileName, t.getToken(), t.getText()); } } else if (t.getType() == ANTLRParser.TOKEN_REF) { g.tool.errMgr.grammarError( ErrorType.UNSUPPORTED_REFERENCE_IN_LEXER_SET, g.fileName, t.getToken(), t.getText()); } } if (invert) { left.addTransition(new NotSetTransition(right, set)); } else { Transition transition; if (set.getIntervals().size() == 1) { Interval interval = set.getIntervals().get(0); transition = new RangeTransition(right, interval.a, interval.b); } else { transition = new SetTransition(right, set); } left.addTransition(transition); } associatedAST.atnState = left; return new Handle(left, right); }
/** * For a lexer, a string is a sequence of char to match. That is, "fog" is treated as 'f' 'o' 'g' * not as a single transition in the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 * states for n characters. */ @Override public Handle stringLiteral(TerminalAST stringLiteralAST) { String chars = stringLiteralAST.getText(); chars = CharSupport.getStringFromGrammarStringLiteral(chars); int n = chars.length(); ATNState left = newState(stringLiteralAST); ATNState prev = left; ATNState right = null; for (int i = 0; i < n; i++) { right = newState(stringLiteralAST); prev.addTransition(new AtomTransition(right, chars.charAt(i))); prev = right; } stringLiteralAST.atnState = left; return new Handle(left, right); }
/** * Given a token type, get a meaningful name for it such as the ID or string literal. If this is a * lexer and the ttype is in the char vocabulary, compute an ANTLR-valid (possibly escaped) char * literal. */ public String getTokenDisplayName(int ttype) { String tokenName = null; // inside any target's char range and is lexer grammar? if (isLexer() && ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE) { return CharSupport.getANTLRCharLiteralForChar(ttype); } else if (ttype == Token.EOF) { tokenName = "EOF"; } else { if (ttype < typeToTokenList.size()) { tokenName = typeToTokenList.get(ttype); if (tokenName != null && tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) && ttype < typeToStringLiteralList.size() && typeToStringLiteralList.get(ttype) != null) { tokenName = typeToStringLiteralList.get(ttype); } } else { tokenName = String.valueOf(ttype); } } // tool.log("grammar", "getTokenDisplayName ttype="+ttype+", name="+tokenName); return tokenName; }