Ejemplo n.º 1
0
 @Override
 public Handle range(GrammarAST a, GrammarAST b) {
   ATNState left = newState(a);
   ATNState right = newState(b);
   int t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
   int t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
   left.addTransition(new RangeTransition(right, t1, t2));
   a.atnState = left;
   b.atnState = left;
   return new Handle(left, right);
 }
Ejemplo n.º 2
0
  /**
   * Given a token type, get a meaningful name for it such as the ID or string literal. If this is a
   * lexer and the ttype is in the char vocabulary, compute an ANTLR-valid (possibly escaped) char
   * literal.
   */
  public String getTokenDisplayName(int ttype) {
    // inside any target's char range and is lexer grammar?
    if (isLexer() && ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE) {
      return CharSupport.getANTLRCharLiteralForChar(ttype);
    }

    if (ttype == Token.EOF) {
      return "EOF";
    }

    if (ttype == Token.INVALID_TYPE) {
      return INVALID_TOKEN_NAME;
    }

    if (ttype >= 0
        && ttype < typeToStringLiteralList.size()
        && typeToStringLiteralList.get(ttype) != null) {
      return typeToStringLiteralList.get(ttype);
    }

    if (ttype >= 0 && ttype < typeToTokenList.size() && typeToTokenList.get(ttype) != null) {
      return typeToTokenList.get(ttype);
    }

    return String.valueOf(ttype);
  }
Ejemplo n.º 3
0
  public IntervalSet getSetFromCharSetLiteral(GrammarAST charSetAST) {
    String chars = charSetAST.getText();
    chars = chars.substring(1, chars.length() - 1);
    String cset = '"' + chars + '"';
    IntervalSet set = new IntervalSet();

    // unescape all valid escape char like \n, leaving escaped dashes as '\-'
    // so we can avoid seeing them as '-' range ops.
    chars = CharSupport.getStringFromGrammarStringLiteral(cset);
    // now make x-y become set of char
    int n = chars.length();
    for (int i = 0; i < n; i++) {
      int c = chars.charAt(i);
      if (c == '\\' && (i + 1) < n && chars.charAt(i + 1) == '-') { // \-
        set.add('-');
        i++;
      } else if ((i + 2) < n && chars.charAt(i + 1) == '-') { // range x-y
        int x = c;
        int y = chars.charAt(i + 2);
        if (x <= y) set.add(x, y);
        i += 2;
      } else {
        set.add(c);
      }
    }
    return set;
  }
Ejemplo n.º 4
0
  @Override
  public Handle lexerCallCommand(GrammarAST ID, GrammarAST arg) {
    LexerAction lexerAction = createLexerAction(ID, arg);
    if (lexerAction != null) {
      return action(ID, lexerAction);
    }

    // fall back to standard action generation for the command
    ST cmdST =
        codegenTemplates.getInstanceOf("Lexer" + CharSupport.capitalize(ID.getText()) + "Command");
    if (cmdST == null) {
      g.tool.errMgr.grammarError(
          ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText());
      return epsilon(ID);
    }

    if (cmdST.impl.formalArguments == null || !cmdST.impl.formalArguments.containsKey("arg")) {
      g.tool.errMgr.grammarError(
          ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText());
      return epsilon(ID);
    }

    cmdST.add("arg", arg.getText());
    cmdST.add("grammar", arg.g);
    return action(cmdST.render());
  }
Ejemplo n.º 5
0
 protected int getTokenType(@NotNull GrammarAST atom) {
   int ttype;
   if (g.isLexer()) {
     ttype = CharSupport.getCharValueFromGrammarCharLiteral(atom.getText());
   } else {
     ttype = g.getTokenType(atom.getText());
   }
   return ttype;
 }
Ejemplo n.º 6
0
  @Override
  public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean invert) {
    ATNState left = newState(associatedAST);
    ATNState right = newState(associatedAST);
    IntervalSet set = new IntervalSet();
    for (GrammarAST t : alts) {
      if (t.getType() == ANTLRParser.RANGE) {
        int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
        int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
        set.add(a, b);
      } else if (t.getType() == ANTLRParser.LEXER_CHAR_SET) {
        set.addAll(getSetFromCharSetLiteral(t));
      } else if (t.getType() == ANTLRParser.STRING_LITERAL) {
        int c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
        if (c != -1) {
          set.add(c);
        } else {
          g.tool.errMgr.grammarError(
              ErrorType.INVALID_LITERAL_IN_LEXER_SET, g.fileName, t.getToken(), t.getText());
        }
      } else if (t.getType() == ANTLRParser.TOKEN_REF) {
        g.tool.errMgr.grammarError(
            ErrorType.UNSUPPORTED_REFERENCE_IN_LEXER_SET, g.fileName, t.getToken(), t.getText());
      }
    }
    if (invert) {
      left.addTransition(new NotSetTransition(right, set));
    } else {
      Transition transition;
      if (set.getIntervals().size() == 1) {
        Interval interval = set.getIntervals().get(0);
        transition = new RangeTransition(right, interval.a, interval.b);
      } else {
        transition = new SetTransition(right, set);
      }

      left.addTransition(transition);
    }
    associatedAST.atnState = left;
    return new Handle(left, right);
  }
Ejemplo n.º 7
0
 /**
  * For a lexer, a string is a sequence of char to match. That is, "fog" is treated as 'f' 'o' 'g'
  * not as a single transition in the DFA. Machine== o-'f'-&gt;o-'o'-&gt;o-'g'-&gt;o and has n+1
  * states for n characters.
  */
 @Override
 public Handle stringLiteral(TerminalAST stringLiteralAST) {
   String chars = stringLiteralAST.getText();
   chars = CharSupport.getStringFromGrammarStringLiteral(chars);
   int n = chars.length();
   ATNState left = newState(stringLiteralAST);
   ATNState prev = left;
   ATNState right = null;
   for (int i = 0; i < n; i++) {
     right = newState(stringLiteralAST);
     prev.addTransition(new AtomTransition(right, chars.charAt(i)));
     prev = right;
   }
   stringLiteralAST.atnState = left;
   return new Handle(left, right);
 }
Ejemplo n.º 8
0
 /**
  * Given a token type, get a meaningful name for it such as the ID or string literal. If this is a
  * lexer and the ttype is in the char vocabulary, compute an ANTLR-valid (possibly escaped) char
  * literal.
  */
 public String getTokenDisplayName(int ttype) {
   String tokenName = null;
   // inside any target's char range and is lexer grammar?
   if (isLexer() && ttype >= Lexer.MIN_CHAR_VALUE && ttype <= Lexer.MAX_CHAR_VALUE) {
     return CharSupport.getANTLRCharLiteralForChar(ttype);
   } else if (ttype == Token.EOF) {
     tokenName = "EOF";
   } else {
     if (ttype < typeToTokenList.size()) {
       tokenName = typeToTokenList.get(ttype);
       if (tokenName != null
           && tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX)
           && ttype < typeToStringLiteralList.size()
           && typeToStringLiteralList.get(ttype) != null) {
         tokenName = typeToStringLiteralList.get(ttype);
       }
     } else {
       tokenName = String.valueOf(ttype);
     }
   }
   //		tool.log("grammar", "getTokenDisplayName ttype="+ttype+", name="+tokenName);
   return tokenName;
 }