Exemple #1
0
  private int parseRegexpFlags(final LexerSource src) throws java.io.IOException {
    char kcode = 0;
    int options = 0;
    int c;
    StringBuilder unknownFlags = new StringBuilder(10);

    for (c = src.read(); c != RubyYaccLexer.EOF && Character.isLetter(c); c = src.read()) {
      switch (c) {
        case 'i':
          options |= ReOptions.RE_OPTION_IGNORECASE;
          break;
        case 'x':
          options |= ReOptions.RE_OPTION_EXTENDED;
          break;
        case 'm':
          options |= ReOptions.RE_OPTION_MULTILINE;
          break;
        case 'o':
          options |= ReOptions.RE_OPTION_ONCE;
          break;
        case 'n':
          kcode = 16;
          break;
        case 'e':
          kcode = 32;
          break;
        case 's':
          kcode = 48;
          break;
        case 'u':
          kcode = 64;
          break;
        case 'j':
          options |= 256; // Regexp engine 'java'
          break;
        default:
          unknownFlags.append((char) c);
          break;
      }
    }
    src.unread(c);
    if (unknownFlags.length() != 0) {
      throw new SyntaxException(
          PID.REGEXP_UNKNOWN_OPTION,
          src.getPosition(),
          "unknown regexp option"
              + (unknownFlags.length() > 1 ? "s" : "")
              + " - "
              + unknownFlags.toString(),
          unknownFlags.toString());
    }
    return options | kcode;
  }
 public void testWasBeginOfLineTellsIfPreviousCharactersWereNewline() throws IOException {
   final LexerSource src = newSource(".\n=end");
   assertTrue(src.lastWasBeginOfLine());
   assertFalse(src.wasBeginOfLine());
   src.read();
   src.read();
   assertTrue(src.lastWasBeginOfLine());
   assertFalse(src.wasBeginOfLine());
   src.read();
   assertFalse(src.lastWasBeginOfLine());
   assertTrue(src.wasBeginOfLine());
   src.read();
   assertFalse(src.lastWasBeginOfLine());
   assertFalse(src.wasBeginOfLine());
 }
 public void testCurrentLineGivesAnErrorLocation() throws IOException {
   LexerSource src = newSource("111111\n222222\n333333");
   for (int i = 0; i < 10; i++) {
     src.read();
   }
   assertEquals("222222\n  ^", src.getCurrentLine());
 }
 @SuppressWarnings("empty-statement")
 public void testLinesAndOffsetsAreReported() throws IOException {
   LexerSource src = newSource("a\nb\nc\nd\n");
   while (src.read() != RubyLexer.EOF) ;
   assertEquals(4, src.getLine());
   assertEquals(8, src.getOffset());
 }
 public void testCaptureLinesWithCarriageReturn() throws IOException {
   List<String> lines = new ArrayList<String>();
   LexerSource src = newSource("1\r\n2\r\n3", lines);
   while (src.read() != RubyLexer.EOF) ;
   assertEquals(3, lines.size());
   assertEquals("1\r\n", lines.get(0));
   assertEquals("2\r\n", lines.get(1));
   assertEquals("3", lines.get(2));
 }
Exemple #6
0
  public int parseSimpleStringIntoBuffer(LexerSource src, ByteList buffer)
      throws java.io.IOException {
    int c;

    while ((c = src.read()) != RubyYaccLexer.EOF) {
      if (c == end) {
        src.unread(c);
        break;
      } else if (c == '\\') {
        c = src.read();
        if ((c == '\n' || c != end) && c != '\\') buffer.append('\\');
      }

      buffer.append(c);
    }

    return c;
  }
 @SuppressWarnings("empty-statement")
 public void testCaptureLines() throws IOException {
   List<String> lines = new ArrayList<String>();
   LexerSource src = newSource("111111\n222222\n333333", lines);
   while (src.read() != RubyLexer.EOF) ;
   assertEquals(3, lines.size());
   assertEquals("111111\n", lines.get(0));
   assertEquals("222222\n", lines.get(1));
   assertEquals("333333", lines.get(2));
 }
 // This one currently fails with InputStreamLexerSource
 public void testCarriageReturnsShouldBeUnreadAutomatically() throws IOException {
   LexerSource src = newSource("abc\r\ndef");
   assertReadShouldProduce(src, "abc\ndef");
   assertEquals(8, src.getOffset());
   src.unreadMany("\ndef");
   assertEquals(3, src.getOffset());
   src.read();
   assertEquals(5, src.getOffset());
   assertReadShouldProduce(src, "def");
 }
Exemple #9
0
  // Was a goto in original ruby lexer
  private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException {
    int c;

    switch (c = src.read()) {
      case '\\':
        parseEscapeIntoBuffer(src, buffer);
        break;
      case RubyYaccLexer.EOF:
        throw new SyntaxException(
            PID.INVALID_ESCAPE_SYNTAX,
            src.getPosition(),
            src.getCurrentLine(),
            "Invalid escape character syntax");
      default:
        buffer.append(c);
    }
  }
 public void testGetRemainingOutputAsStream() throws IOException {
   LexerSource src = newSource("111111\n222222\n333333\n");
   for (int i = 0; i < 10; i++) {
     src.read();
   }
   final InputStream in = src.getRemainingAsStream();
   assertActionShouldProduce(
       "222\n333333\n",
       new Callable<byte[]>() {
         public byte[] call() throws Exception {
           ByteList buf = new ByteList();
           int c;
           while ((c = in.read()) != -1) {
             buf.append(c);
           }
           return buf.bytes();
         }
       });
 }
 private void assertReadShouldProduce(LexerSource src, String expected) {
   final ByteList actual = new ByteList(expected.length());
   for (int i = 0; i < expected.length(); i++) {
     try {
       int c = src.read();
       if (c == RubyLexer.EOF) {
         break;
       }
       actual.append(c);
     } catch (IOException ex) {
       fail(ex.getMessage());
     }
   }
   assertActionShouldProduce(
       expected,
       new Callable<byte[]>() {
         public byte[] call() throws Exception {
           return actual.bytes();
         }
       });
 }
 public void testWasBeginOfLineBehaviorAtBeginningOfInput() throws IOException {
   final LexerSource src = newSource("=begin");
   src.read();
   assertFalse(src.lastWasBeginOfLine());
   assertTrue(src.wasBeginOfLine());
 }
Exemple #13
0
  public int parseString(RubyYaccLexer lexer, LexerSource src) throws java.io.IOException {
    boolean spaceSeen = false;
    int c;

    // FIXME: How much more obtuse can this be?
    // Heredoc already parsed this and saved string...Do not parse..just return
    if (flags == -1) {
      lexer.setValue(new Token("\"", lexer.getPosition()));
      return Tokens.tSTRING_END;
    }

    c = src.read();
    if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) {
      do {
        c = src.read();
      } while (Character.isWhitespace(c));
      spaceSeen = true;
    }

    if (c == end && nest == 0) {
      if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0) {
        flags = -1;
        lexer.getPosition();
        return ' ';
      }

      if ((flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0) {
        lexer.setValue(
            new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src)));
        return Tokens.tREGEXP_END;
      }

      lexer.setValue(new Token("\"", lexer.getPosition()));
      return Tokens.tSTRING_END;
    }

    if (spaceSeen) {
      src.unread(c);
      lexer.getPosition();
      return ' ';
    }

    // Single-quote fast path
    if (begin == '\0' && flags == 0) {
      ByteList buffer = new ByteList();
      src.unread(c);
      if (parseSimpleStringIntoBuffer(src, buffer) == RubyYaccLexer.EOF) {
        throw new SyntaxException(
            PID.STRING_HITS_EOF,
            src.getPosition(),
            src.getCurrentLine(),
            "unterminated string meets end of file");
      }

      /*
      ByteList buffer;
      src.unread(c);
      if ((buffer = src.readUntil(end)) == null) {
          throw new SyntaxException(src.getPosition(), "unterminated string meets end of file");
      }
      */
      lexer.setValue(new StrNode(lexer.getPosition(), buffer));
      return Tokens.tSTRING_CONTENT;
    }

    ByteList buffer = new ByteList();

    if ((flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') {
      c = src.read();
      switch (c) {
        case '$':
        case '@':
          src.unread(c);
          lexer.setValue(new Token("#" + c, lexer.getPosition()));
          return Tokens.tSTRING_DVAR;
        case '{':
          lexer.setValue(new Token("#" + c, lexer.getPosition()));
          return Tokens.tSTRING_DBEG;
      }
      buffer.append((byte) '#');
    }
    src.unread(c);

    if (parseStringIntoBuffer(lexer, src, buffer) == RubyYaccLexer.EOF) {
      throw new SyntaxException(
          PID.STRING_HITS_EOF,
          src.getPosition(),
          src.getCurrentLine(),
          "unterminated string meets end of file");
    }

    lexer.setValue(new StrNode(lexer.getPosition(), buffer));
    return Tokens.tSTRING_CONTENT;
  }
Exemple #14
0
  private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException {
    int c;

    switch (c = src.read()) {
      case '\n':
        break; /* just ignore */
      case '0':
      case '1':
      case '2':
      case '3': /* octal constant */
      case '4':
      case '5':
      case '6':
      case '7':
        buffer.append('\\');
        buffer.append(c);
        for (int i = 0; i < 2; i++) {
          c = src.read();
          if (c == RubyYaccLexer.EOF) {
            throw new SyntaxException(
                PID.INVALID_ESCAPE_SYNTAX,
                src.getPosition(),
                src.getCurrentLine(),
                "Invalid escape character syntax");
          }
          if (!RubyYaccLexer.isOctChar(c)) {
            src.unread(c);
            break;
          }
          buffer.append(c);
        }
        break;
      case 'x': /* hex constant */
        buffer.append('\\');
        buffer.append(c);
        c = src.read();
        if (!RubyYaccLexer.isHexChar(c)) {
          throw new SyntaxException(
              PID.INVALID_ESCAPE_SYNTAX,
              src.getPosition(),
              src.getCurrentLine(),
              "Invalid escape character syntax");
        }
        buffer.append(c);
        c = src.read();
        if (RubyYaccLexer.isHexChar(c)) {
          buffer.append(c);
        } else {
          src.unread(c);
        }
        break;
      case 'M':
        if ((c = src.read()) != '-') {
          throw new SyntaxException(
              PID.INVALID_ESCAPE_SYNTAX,
              src.getPosition(),
              src.getCurrentLine(),
              "Invalid escape character syntax");
        }
        buffer.append(new byte[] {'\\', 'M', '-'});
        escaped(src, buffer);
        break;
      case 'C':
        if ((c = src.read()) != '-') {
          throw new SyntaxException(
              PID.INVALID_ESCAPE_SYNTAX,
              src.getPosition(),
              src.getCurrentLine(),
              "Invalid escape character syntax");
        }
        buffer.append(new byte[] {'\\', 'C', '-'});
        escaped(src, buffer);
        break;
      case 'c':
        buffer.append(new byte[] {'\\', 'c'});
        escaped(src, buffer);
        break;
      case RubyYaccLexer.EOF:
        throw new SyntaxException(
            PID.INVALID_ESCAPE_SYNTAX,
            src.getPosition(),
            src.getCurrentLine(),
            "Invalid escape character syntax");
      default:
        if (c != '\\' || c != end) {
          buffer.append('\\');
        }
        buffer.append(c);
    }
  }
Exemple #15
0
  public int parseStringIntoBuffer(RubyYaccLexer lexer, LexerSource src, ByteList buffer)
      throws java.io.IOException {
    boolean qwords = (flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0;
    boolean expand = (flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0;
    boolean escape = (flags & RubyYaccLexer.STR_FUNC_ESCAPE) != 0;
    boolean regexp = (flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0;
    int c;

    while ((c = src.read()) != RubyYaccLexer.EOF) {
      if (begin != '\0' && c == begin) {
        nest++;
      } else if (c == end) {
        if (nest == 0) {
          src.unread(c);
          break;
        }
        nest--;
      } else if (c == '#' && expand && !src.peek('\n')) {
        int c2 = src.read();

        if (c2 == '$' || c2 == '@' || c2 == '{') {
          src.unread(c2);
          src.unread(c);
          break;
        }
        src.unread(c2);
      } else if (c == '\\') {
        c = src.read();
        switch (c) {
          case '\n':
            if (qwords) break;
            if (expand) continue;
            buffer.append('\\');
            break;

          case '\\':
            if (escape) buffer.append(c);
            break;

          default:
            if (regexp) {
              src.unread(c);
              parseEscapeIntoBuffer(src, buffer);
              continue;
            } else if (expand) {
              src.unread(c);
              if (escape) buffer.append('\\');
              c = lexer.readEscape();
            } else if (qwords && Character.isWhitespace(c)) {
              /* ignore backslashed spaces in %w */
            } else if (c != end && !(begin != '\0' && c == begin)) {
              buffer.append('\\');
            }
        }
      } else if (qwords && Character.isWhitespace(c)) {
        src.unread(c);
        break;
      }
      buffer.append(c);
    }

    return c;
  }
 public void testUnreadBothNewAndExistingCharacters() throws IOException {
   LexerSource src = newSource("abc");
   src.read();
   src.unreadMany("123a");
   assertReadShouldProduce(src, "123abc");
 }
 public void testPeekGivesTheNextCharaterWithoutAdvancing() throws IOException {
   LexerSource src = newSource("abc");
   assertTrue(src.peek('a'));
   assertEquals('a', src.read());
 }