private int parseRegexpFlags(final LexerSource src) throws java.io.IOException { char kcode = 0; int options = 0; int c; StringBuilder unknownFlags = new StringBuilder(10); for (c = src.read(); c != RubyYaccLexer.EOF && Character.isLetter(c); c = src.read()) { switch (c) { case 'i': options |= ReOptions.RE_OPTION_IGNORECASE; break; case 'x': options |= ReOptions.RE_OPTION_EXTENDED; break; case 'm': options |= ReOptions.RE_OPTION_MULTILINE; break; case 'o': options |= ReOptions.RE_OPTION_ONCE; break; case 'n': kcode = 16; break; case 'e': kcode = 32; break; case 's': kcode = 48; break; case 'u': kcode = 64; break; case 'j': options |= 256; // Regexp engine 'java' break; default: unknownFlags.append((char) c); break; } } src.unread(c); if (unknownFlags.length() != 0) { throw new SyntaxException( PID.REGEXP_UNKNOWN_OPTION, src.getPosition(), "unknown regexp option" + (unknownFlags.length() > 1 ? "s" : "") + " - " + unknownFlags.toString(), unknownFlags.toString()); } return options | kcode; }
public void testWasBeginOfLineTellsIfPreviousCharactersWereNewline() throws IOException { final LexerSource src = newSource(".\n=end"); assertTrue(src.lastWasBeginOfLine()); assertFalse(src.wasBeginOfLine()); src.read(); src.read(); assertTrue(src.lastWasBeginOfLine()); assertFalse(src.wasBeginOfLine()); src.read(); assertFalse(src.lastWasBeginOfLine()); assertTrue(src.wasBeginOfLine()); src.read(); assertFalse(src.lastWasBeginOfLine()); assertFalse(src.wasBeginOfLine()); }
public void testCurrentLineGivesAnErrorLocation() throws IOException { LexerSource src = newSource("111111\n222222\n333333"); for (int i = 0; i < 10; i++) { src.read(); } assertEquals("222222\n ^", src.getCurrentLine()); }
@SuppressWarnings("empty-statement") public void testLinesAndOffsetsAreReported() throws IOException { LexerSource src = newSource("a\nb\nc\nd\n"); while (src.read() != RubyLexer.EOF) ; assertEquals(4, src.getLine()); assertEquals(8, src.getOffset()); }
public void testCaptureLinesWithCarriageReturn() throws IOException { List<String> lines = new ArrayList<String>(); LexerSource src = newSource("1\r\n2\r\n3", lines); while (src.read() != RubyLexer.EOF) ; assertEquals(3, lines.size()); assertEquals("1\r\n", lines.get(0)); assertEquals("2\r\n", lines.get(1)); assertEquals("3", lines.get(2)); }
public int parseSimpleStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { int c; while ((c = src.read()) != RubyYaccLexer.EOF) { if (c == end) { src.unread(c); break; } else if (c == '\\') { c = src.read(); if ((c == '\n' || c != end) && c != '\\') buffer.append('\\'); } buffer.append(c); } return c; }
@SuppressWarnings("empty-statement") public void testCaptureLines() throws IOException { List<String> lines = new ArrayList<String>(); LexerSource src = newSource("111111\n222222\n333333", lines); while (src.read() != RubyLexer.EOF) ; assertEquals(3, lines.size()); assertEquals("111111\n", lines.get(0)); assertEquals("222222\n", lines.get(1)); assertEquals("333333", lines.get(2)); }
// This one currently fails with InputStreamLexerSource public void testCarriageReturnsShouldBeUnreadAutomatically() throws IOException { LexerSource src = newSource("abc\r\ndef"); assertReadShouldProduce(src, "abc\ndef"); assertEquals(8, src.getOffset()); src.unreadMany("\ndef"); assertEquals(3, src.getOffset()); src.read(); assertEquals(5, src.getOffset()); assertReadShouldProduce(src, "def"); }
// Was a goto in original ruby lexer private void escaped(LexerSource src, ByteList buffer) throws java.io.IOException { int c; switch (c = src.read()) { case '\\': parseEscapeIntoBuffer(src, buffer); break; case RubyYaccLexer.EOF: throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); default: buffer.append(c); } }
public void testGetRemainingOutputAsStream() throws IOException { LexerSource src = newSource("111111\n222222\n333333\n"); for (int i = 0; i < 10; i++) { src.read(); } final InputStream in = src.getRemainingAsStream(); assertActionShouldProduce( "222\n333333\n", new Callable<byte[]>() { public byte[] call() throws Exception { ByteList buf = new ByteList(); int c; while ((c = in.read()) != -1) { buf.append(c); } return buf.bytes(); } }); }
private void assertReadShouldProduce(LexerSource src, String expected) { final ByteList actual = new ByteList(expected.length()); for (int i = 0; i < expected.length(); i++) { try { int c = src.read(); if (c == RubyLexer.EOF) { break; } actual.append(c); } catch (IOException ex) { fail(ex.getMessage()); } } assertActionShouldProduce( expected, new Callable<byte[]>() { public byte[] call() throws Exception { return actual.bytes(); } }); }
public void testWasBeginOfLineBehaviorAtBeginningOfInput() throws IOException { final LexerSource src = newSource("=begin"); src.read(); assertFalse(src.lastWasBeginOfLine()); assertTrue(src.wasBeginOfLine()); }
public int parseString(RubyYaccLexer lexer, LexerSource src) throws java.io.IOException { boolean spaceSeen = false; int c; // FIXME: How much more obtuse can this be? // Heredoc already parsed this and saved string...Do not parse..just return if (flags == -1) { lexer.setValue(new Token("\"", lexer.getPosition())); return Tokens.tSTRING_END; } c = src.read(); if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) { do { c = src.read(); } while (Character.isWhitespace(c)); spaceSeen = true; } if (c == end && nest == 0) { if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0) { flags = -1; lexer.getPosition(); return ' '; } if ((flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0) { lexer.setValue( new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src))); return Tokens.tREGEXP_END; } lexer.setValue(new Token("\"", lexer.getPosition())); return Tokens.tSTRING_END; } if (spaceSeen) { src.unread(c); lexer.getPosition(); return ' '; } // Single-quote fast path if (begin == '\0' && flags == 0) { ByteList buffer = new ByteList(); src.unread(c); if (parseSimpleStringIntoBuffer(src, buffer) == RubyYaccLexer.EOF) { throw new SyntaxException( PID.STRING_HITS_EOF, src.getPosition(), src.getCurrentLine(), "unterminated string meets end of file"); } /* ByteList buffer; src.unread(c); if ((buffer = src.readUntil(end)) == null) { throw new SyntaxException(src.getPosition(), "unterminated string meets end of file"); } */ lexer.setValue(new StrNode(lexer.getPosition(), buffer)); return Tokens.tSTRING_CONTENT; } ByteList buffer = new ByteList(); if ((flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') { c = src.read(); switch (c) { case '$': case '@': src.unread(c); lexer.setValue(new Token("#" + c, lexer.getPosition())); return Tokens.tSTRING_DVAR; case '{': lexer.setValue(new Token("#" + c, lexer.getPosition())); return Tokens.tSTRING_DBEG; } buffer.append((byte) '#'); } src.unread(c); if (parseStringIntoBuffer(lexer, src, buffer) == RubyYaccLexer.EOF) { throw new SyntaxException( PID.STRING_HITS_EOF, src.getPosition(), src.getCurrentLine(), "unterminated string meets end of file"); } lexer.setValue(new StrNode(lexer.getPosition(), buffer)); return Tokens.tSTRING_CONTENT; }
private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { int c; switch (c = src.read()) { case '\n': break; /* just ignore */ case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': buffer.append('\\'); buffer.append(c); for (int i = 0; i < 2; i++) { c = src.read(); if (c == RubyYaccLexer.EOF) { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } if (!RubyYaccLexer.isOctChar(c)) { src.unread(c); break; } buffer.append(c); } break; case 'x': /* hex constant */ buffer.append('\\'); buffer.append(c); c = src.read(); if (!RubyYaccLexer.isHexChar(c)) { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } buffer.append(c); c = src.read(); if (RubyYaccLexer.isHexChar(c)) { buffer.append(c); } else { src.unread(c); } break; case 'M': if ((c = src.read()) != '-') { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } buffer.append(new byte[] {'\\', 'M', '-'}); escaped(src, buffer); break; case 'C': if ((c = src.read()) != '-') { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } buffer.append(new byte[] {'\\', 'C', '-'}); escaped(src, buffer); break; case 'c': buffer.append(new byte[] {'\\', 'c'}); escaped(src, buffer); break; case RubyYaccLexer.EOF: throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); default: if (c != '\\' || c != end) { buffer.append('\\'); } buffer.append(c); } }
public int parseStringIntoBuffer(RubyYaccLexer lexer, LexerSource src, ByteList buffer) throws java.io.IOException { boolean qwords = (flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0; boolean expand = (flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0; boolean escape = (flags & RubyYaccLexer.STR_FUNC_ESCAPE) != 0; boolean regexp = (flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0; int c; while ((c = src.read()) != RubyYaccLexer.EOF) { if (begin != '\0' && c == begin) { nest++; } else if (c == end) { if (nest == 0) { src.unread(c); break; } nest--; } else if (c == '#' && expand && !src.peek('\n')) { int c2 = src.read(); if (c2 == '$' || c2 == '@' || c2 == '{') { src.unread(c2); src.unread(c); break; } src.unread(c2); } else if (c == '\\') { c = src.read(); switch (c) { case '\n': if (qwords) break; if (expand) continue; buffer.append('\\'); break; case '\\': if (escape) buffer.append(c); break; default: if (regexp) { src.unread(c); parseEscapeIntoBuffer(src, buffer); continue; } else if (expand) { src.unread(c); if (escape) buffer.append('\\'); c = lexer.readEscape(); } else if (qwords && Character.isWhitespace(c)) { /* ignore backslashed spaces in %w */ } else if (c != end && !(begin != '\0' && c == begin)) { buffer.append('\\'); } } } else if (qwords && Character.isWhitespace(c)) { src.unread(c); break; } buffer.append(c); } return c; }
public void testUnreadBothNewAndExistingCharacters() throws IOException { LexerSource src = newSource("abc"); src.read(); src.unreadMany("123a"); assertReadShouldProduce(src, "123abc"); }
public void testPeekGivesTheNextCharaterWithoutAdvancing() throws IOException { LexerSource src = newSource("abc"); assertTrue(src.peek('a')); assertEquals('a', src.read()); }