Beispiel #1
0
  private int parseRegexpFlags(final LexerSource src) throws java.io.IOException {
    char kcode = 0;
    int options = 0;
    int c;
    StringBuilder unknownFlags = new StringBuilder(10);

    for (c = src.read(); c != RubyYaccLexer.EOF && Character.isLetter(c); c = src.read()) {
      switch (c) {
        case 'i':
          options |= ReOptions.RE_OPTION_IGNORECASE;
          break;
        case 'x':
          options |= ReOptions.RE_OPTION_EXTENDED;
          break;
        case 'm':
          options |= ReOptions.RE_OPTION_MULTILINE;
          break;
        case 'o':
          options |= ReOptions.RE_OPTION_ONCE;
          break;
        case 'n':
          kcode = 16;
          break;
        case 'e':
          kcode = 32;
          break;
        case 's':
          kcode = 48;
          break;
        case 'u':
          kcode = 64;
          break;
        case 'j':
          options |= 256; // Regexp engine 'java'
          break;
        default:
          unknownFlags.append((char) c);
          break;
      }
    }
    src.unread(c);
    if (unknownFlags.length() != 0) {
      throw new SyntaxException(
          PID.REGEXP_UNKNOWN_OPTION,
          src.getPosition(),
          "unknown regexp option"
              + (unknownFlags.length() > 1 ? "s" : "")
              + " - "
              + unknownFlags.toString(),
          unknownFlags.toString());
    }
    return options | kcode;
  }
Beispiel #2
0
  public int parseSimpleStringIntoBuffer(LexerSource src, ByteList buffer)
      throws java.io.IOException {
    int c;

    while ((c = src.read()) != RubyYaccLexer.EOF) {
      if (c == end) {
        src.unread(c);
        break;
      } else if (c == '\\') {
        c = src.read();
        if ((c == '\n' || c != end) && c != '\\') buffer.append('\\');
      }

      buffer.append(c);
    }

    return c;
  }
 public void testReadUnreadReadShouldProduceCorrectSequence() {
   LexerSource src = newSource("abcd1234");
   assertReadShouldProduce(src, "abcd");
   src.unread('d');
   assertReadShouldProduce(src, "d1234");
 }
Beispiel #4
0
  public int parseString(RubyYaccLexer lexer, LexerSource src) throws java.io.IOException {
    boolean spaceSeen = false;
    int c;

    // FIXME: How much more obtuse can this be?
    // Heredoc already parsed this and saved string...Do not parse..just return
    if (flags == -1) {
      lexer.setValue(new Token("\"", lexer.getPosition()));
      return Tokens.tSTRING_END;
    }

    c = src.read();
    if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) {
      do {
        c = src.read();
      } while (Character.isWhitespace(c));
      spaceSeen = true;
    }

    if (c == end && nest == 0) {
      if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
        flags = -1;
        lexer.getPosition();
        return ' ';
      }

      if ((flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
        lexer.setValue(
            new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src)));
        return Tokens.tREGEXP_END;
      }

      lexer.setValue(new Token("\"", lexer.getPosition()));
      return Tokens.tSTRING_END;
    }

    if (spaceSeen) {
      src.unread(c);
      lexer.getPosition();
      return ' ';
    }

    // Single-quote fast path
    if (begin == '\0' && flags == 0) {
      ByteList buffer = new ByteList();
      src.unread(c);
      if (parseSimpleStringIntoBuffer(src, buffer) == RubyYaccLexer.EOF) {
        throw new SyntaxException(
            PID.STRING_HITS_EOF,
            src.getPosition(),
            src.getCurrentLine(),
            "unterminated string meets end of file");
      }

      /*
      ByteList buffer;
      src.unread(c);
      if ((buffer = src.readUntil(end)) == null) {
          throw new SyntaxException(src.getPosition(), "unterminated string meets end of file");
      }
      */
      lexer.setValue(new StrNode(lexer.getPosition(), buffer));
      return Tokens.tSTRING_CONTENT;
    }

    ByteList buffer = new ByteList();

    if ((flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') {
      c = src.read();
      switch (c) {
        case '$':
        case '@':
          src.unread(c);
          lexer.setValue(new Token("#" + c, lexer.getPosition()));
          return Tokens.tSTRING_DVAR;
        case '{':
          lexer.setValue(new Token("#" + c, lexer.getPosition()));
          return Tokens.tSTRING_DBEG;
      }
      buffer.append((byte) '#');
    }
    src.unread(c);

    if (parseStringIntoBuffer(lexer, src, buffer) == RubyYaccLexer.EOF) {
      throw new SyntaxException(
          PID.STRING_HITS_EOF,
          src.getPosition(),
          src.getCurrentLine(),
          "unterminated string meets end of file");
    }

    lexer.setValue(new StrNode(lexer.getPosition(), buffer));
    return Tokens.tSTRING_CONTENT;
  }
Beispiel #5
0
  private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException {
    int c;

    switch (c = src.read()) {
      case '\n':
        break; /* just ignore */
      case '0':
      case '1':
      case '2':
      case '3': /* octal constant */
      case '4':
      case '5':
      case '6':
      case '7':
        buffer.append('\\');
        buffer.append(c);
        for (int i = 0; i < 2; i++) {
          c = src.read();
          if (c == RubyYaccLexer.EOF) {
            throw new SyntaxException(
                PID.INVALID_ESCAPE_SYNTAX,
                src.getPosition(),
                src.getCurrentLine(),
                "Invalid escape character syntax");
          }
          if (!RubyYaccLexer.isOctChar(c)) {
            src.unread(c);
            break;
          }
          buffer.append(c);
        }
        break;
      case 'x': /* hex constant */
        buffer.append('\\');
        buffer.append(c);
        c = src.read();
        if (!RubyYaccLexer.isHexChar(c)) {
          throw new SyntaxException(
              PID.INVALID_ESCAPE_SYNTAX,
              src.getPosition(),
              src.getCurrentLine(),
              "Invalid escape character syntax");
        }
        buffer.append(c);
        c = src.read();
        if (RubyYaccLexer.isHexChar(c)) {
          buffer.append(c);
        } else {
          src.unread(c);
        }
        break;
      case 'M':
        if ((c = src.read()) != '-') {
          throw new SyntaxException(
              PID.INVALID_ESCAPE_SYNTAX,
              src.getPosition(),
              src.getCurrentLine(),
              "Invalid escape character syntax");
        }
        buffer.append(new byte[] {'\\', 'M', '-'});
        escaped(src, buffer);
        break;
      case 'C':
        if ((c = src.read()) != '-') {
          throw new SyntaxException(
              PID.INVALID_ESCAPE_SYNTAX,
              src.getPosition(),
              src.getCurrentLine(),
              "Invalid escape character syntax");
        }
        buffer.append(new byte[] {'\\', 'C', '-'});
        escaped(src, buffer);
        break;
      case 'c':
        buffer.append(new byte[] {'\\', 'c'});
        escaped(src, buffer);
        break;
      case RubyYaccLexer.EOF:
        throw new SyntaxException(
            PID.INVALID_ESCAPE_SYNTAX,
            src.getPosition(),
            src.getCurrentLine(),
            "Invalid escape character syntax");
      default:
        if (c != '\\' || c != end) {
          buffer.append('\\');
        }
        buffer.append(c);
    }
  }
Beispiel #6
0
  public int parseStringIntoBuffer(RubyYaccLexer lexer, LexerSource src, ByteList buffer)
      throws java.io.IOException {
    boolean qwords = (flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0;
    boolean expand = (flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0;
    boolean escape = (flags & RubyYaccLexer.STR_FUNC_ESCAPE) != 0;
    boolean regexp = (flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0;
    int c;

    while ((c = src.read()) != RubyYaccLexer.EOF) {
      if (begin != '\0' && c == begin) {
        nest++;
      } else if (c == end) {
        if (nest == 0) {
          src.unread(c);
          break;
        }
        nest--;
      } else if (c == '#' && expand && !src.peek('\n')) {
        int c2 = src.read();

        if (c2 == '$' || c2 == '@' || c2 == '{') {
          src.unread(c2);
          src.unread(c);
          break;
        }
        src.unread(c2);
      } else if (c == '\\') {
        c = src.read();
        switch (c) {
          case '\n':
            if (qwords) break;
            if (expand) continue;
            buffer.append('\\');
            break;

          case '\\':
            if (escape) buffer.append(c);
            break;

          default:
            if (regexp) {
              src.unread(c);
              parseEscapeIntoBuffer(src, buffer);
              continue;
            } else if (expand) {
              src.unread(c);
              if (escape) buffer.append('\\');
              c = lexer.readEscape();
            } else if (qwords && Character.isWhitespace(c)) {
              /* ignore backslashed spaces in %w */
            } else if (c != end && !(begin != '\0' && c == begin)) {
              buffer.append('\\');
            }
        }
      } else if (qwords && Character.isWhitespace(c)) {
        src.unread(c);
        break;
      }
      buffer.append(c);
    }

    return c;
  }