private int parseRegexpFlags(final LexerSource src) throws java.io.IOException { char kcode = 0; int options = 0; int c; StringBuilder unknownFlags = new StringBuilder(10); for (c = src.read(); c != RubyYaccLexer.EOF && Character.isLetter(c); c = src.read()) { switch (c) { case 'i': options |= ReOptions.RE_OPTION_IGNORECASE; break; case 'x': options |= ReOptions.RE_OPTION_EXTENDED; break; case 'm': options |= ReOptions.RE_OPTION_MULTILINE; break; case 'o': options |= ReOptions.RE_OPTION_ONCE; break; case 'n': kcode = 16; break; case 'e': kcode = 32; break; case 's': kcode = 48; break; case 'u': kcode = 64; break; case 'j': options |= 256; // Regexp engine 'java' break; default: unknownFlags.append((char) c); break; } } src.unread(c); if (unknownFlags.length() != 0) { throw new SyntaxException( PID.REGEXP_UNKNOWN_OPTION, src.getPosition(), "unknown regexp option" + (unknownFlags.length() > 1 ? "s" : "") + " - " + unknownFlags.toString(), unknownFlags.toString()); } return options | kcode; }
public int parseSimpleStringIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { int c; while ((c = src.read()) != RubyYaccLexer.EOF) { if (c == end) { src.unread(c); break; } else if (c == '\\') { c = src.read(); if ((c == '\n' || c != end) && c != '\\') buffer.append('\\'); } buffer.append(c); } return c; }
public void testReadUnreadReadShouldProduceCorrectSequence() { LexerSource src = newSource("abcd1234"); assertReadShouldProduce(src, "abcd"); src.unread('d'); assertReadShouldProduce(src, "d1234"); }
public int parseString(RubyYaccLexer lexer, LexerSource src) throws java.io.IOException { boolean spaceSeen = false; int c; // FIXME: How much more obtuse can this be? // Heredoc already parsed this and saved string...Do not parse..just return if (flags == -1) { lexer.setValue(new Token("\"", lexer.getPosition())); return Tokens.tSTRING_END; } c = src.read(); if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0 && Character.isWhitespace(c)) { do { c = src.read(); } while (Character.isWhitespace(c)); spaceSeen = true; } if (c == end && nest == 0) { if ((flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0) { flags = -1; lexer.getPosition(); return ' '; } if ((flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0) { lexer.setValue( new RegexpNode(src.getPosition(), ByteList.create(""), parseRegexpFlags(src))); return Tokens.tREGEXP_END; } lexer.setValue(new Token("\"", lexer.getPosition())); return Tokens.tSTRING_END; } if (spaceSeen) { src.unread(c); lexer.getPosition(); return ' '; } // Single-quote fast path if (begin == '\0' && flags == 0) { ByteList buffer = new ByteList(); src.unread(c); if (parseSimpleStringIntoBuffer(src, buffer) == RubyYaccLexer.EOF) { throw new SyntaxException( PID.STRING_HITS_EOF, src.getPosition(), src.getCurrentLine(), "unterminated string meets end of file"); } /* ByteList buffer; src.unread(c); if ((buffer = src.readUntil(end)) == null) { throw new SyntaxException(src.getPosition(), "unterminated string meets end of file"); } */ lexer.setValue(new StrNode(lexer.getPosition(), buffer)); return Tokens.tSTRING_CONTENT; } ByteList buffer = new ByteList(); if ((flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0 && c == '#') { c = src.read(); switch (c) { case '$': case '@': src.unread(c); lexer.setValue(new Token("#" + c, lexer.getPosition())); return Tokens.tSTRING_DVAR; case '{': lexer.setValue(new Token("#" + c, lexer.getPosition())); return Tokens.tSTRING_DBEG; } buffer.append((byte) '#'); } src.unread(c); if (parseStringIntoBuffer(lexer, src, buffer) == RubyYaccLexer.EOF) { throw new SyntaxException( PID.STRING_HITS_EOF, src.getPosition(), src.getCurrentLine(), "unterminated string meets end of file"); } lexer.setValue(new StrNode(lexer.getPosition(), buffer)); return Tokens.tSTRING_CONTENT; }
private void parseEscapeIntoBuffer(LexerSource src, ByteList buffer) throws java.io.IOException { int c; switch (c = src.read()) { case '\n': break; /* just ignore */ case '0': case '1': case '2': case '3': /* octal constant */ case '4': case '5': case '6': case '7': buffer.append('\\'); buffer.append(c); for (int i = 0; i < 2; i++) { c = src.read(); if (c == RubyYaccLexer.EOF) { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } if (!RubyYaccLexer.isOctChar(c)) { src.unread(c); break; } buffer.append(c); } break; case 'x': /* hex constant */ buffer.append('\\'); buffer.append(c); c = src.read(); if (!RubyYaccLexer.isHexChar(c)) { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } buffer.append(c); c = src.read(); if (RubyYaccLexer.isHexChar(c)) { buffer.append(c); } else { src.unread(c); } break; case 'M': if ((c = src.read()) != '-') { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } buffer.append(new byte[] {'\\', 'M', '-'}); escaped(src, buffer); break; case 'C': if ((c = src.read()) != '-') { throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); } buffer.append(new byte[] {'\\', 'C', '-'}); escaped(src, buffer); break; case 'c': buffer.append(new byte[] {'\\', 'c'}); escaped(src, buffer); break; case RubyYaccLexer.EOF: throw new SyntaxException( PID.INVALID_ESCAPE_SYNTAX, src.getPosition(), src.getCurrentLine(), "Invalid escape character syntax"); default: if (c != '\\' || c != end) { buffer.append('\\'); } buffer.append(c); } }
public int parseStringIntoBuffer(RubyYaccLexer lexer, LexerSource src, ByteList buffer) throws java.io.IOException { boolean qwords = (flags & RubyYaccLexer.STR_FUNC_QWORDS) != 0; boolean expand = (flags & RubyYaccLexer.STR_FUNC_EXPAND) != 0; boolean escape = (flags & RubyYaccLexer.STR_FUNC_ESCAPE) != 0; boolean regexp = (flags & RubyYaccLexer.STR_FUNC_REGEXP) != 0; int c; while ((c = src.read()) != RubyYaccLexer.EOF) { if (begin != '\0' && c == begin) { nest++; } else if (c == end) { if (nest == 0) { src.unread(c); break; } nest--; } else if (c == '#' && expand && !src.peek('\n')) { int c2 = src.read(); if (c2 == '$' || c2 == '@' || c2 == '{') { src.unread(c2); src.unread(c); break; } src.unread(c2); } else if (c == '\\') { c = src.read(); switch (c) { case '\n': if (qwords) break; if (expand) continue; buffer.append('\\'); break; case '\\': if (escape) buffer.append(c); break; default: if (regexp) { src.unread(c); parseEscapeIntoBuffer(src, buffer); continue; } else if (expand) { src.unread(c); if (escape) buffer.append('\\'); c = lexer.readEscape(); } else if (qwords && Character.isWhitespace(c)) { /* ignore backslashed spaces in %w */ } else if (c != end && !(begin != '\0' && c == begin)) { buffer.append('\\'); } } } else if (qwords && Character.isWhitespace(c)) { src.unread(c); break; } buffer.append(c); } return c; }