/** * build the token for operators (+ -) or separators (parens, braces) filter out comments which * begin with two slashes * * @param s is the String representing the token * @param startPosition is the column in the source file where the token begins * @param endPosition is the column in the source file where the token ends * @return the Token just found */ public Token makeToken(String s, int startPosition, int endPosition) { if (s.equals("//")) { // filter comment try { int oldLine = source.getLineno(); do { ch = source.read(); } while (oldLine == source.getLineno()); } catch (Exception e) { atEOF = true; } return nextToken(); } Symbol sym = Symbol.symbol(s, Tokens.BogusToken); // be sure it's a valid token if (sym == null) { System.out.println("******** illegal character: " + s + " " + "Line: " + source.getLineno()); atEOF = true; return nextToken(); } return new Token(startPosition, endPosition, sym); }
public Lexer(String sourceFile) throws Exception { new TokenType(); // init token table source = new SourceReader(sourceFile); ch = source.read(); }
/** @return the next Token found in the source file */ public Token nextToken() { // ch is always the next char to process if (atEOF) { if (source != null) { source.close(); source = null; } return null; } try { while (Character.isWhitespace(ch)) { // scan past whitespace ch = source.read(); } } catch (Exception e) { atEOF = true; return nextToken(); } startPosition = source.getPosition(); endPosition = startPosition - 1; if (Character.isJavaIdentifierStart(ch)) { // return tokens for ids and reserved words String id = ""; try { do { endPosition++; id += ch; ch = source.read(); } while (Character.isJavaIdentifierPart(ch)); } catch (Exception e) { atEOF = true; } return newIdToken(id, startPosition, endPosition); } // This block is a translation of state diagram that I drew // case number starts with digit if (Character.isDigit(ch)) { try { String number = ""; // Read the second item endPosition++; number += ch; ch = source.read(); // check if the second item is digit, decimal or neither // second item is digit if (Character.isDigit(ch)) { // move to section 1 then checke for decimal do { endPosition++; number += ch; ch = source.read(); } while (Character.isDigit(ch)); if (ch == '.') { do { endPosition++; number += ch; ch = source.read(); } while (Character.isDigit(ch)); return newFloatToken(number, startPosition, endPosition); } else { return newNumberToken(number, startPosition, endPosition); } } // second item is decimal else if (ch == '.') { // move to section 2 check for scientific notaiton do { endPosition++; number += ch; ch = source.read(); } while (Character.isDigit(ch)); if (ch == 'E' || ch == 'e') { // move to scientific notation endPosition++; number += ch; ch = source.read(); if (ch == '+' || ch == '-') { endPosition++; number += ch; ch = source.read(); if (Character.isDigit(ch)) { do { endPosition++; number += ch; ch = source.read(); } while (Character.isDigit(ch)); return newSciToken(number, startPosition, endPosition); } else { return newErrorToken(number, startPosition, endPosition); } } else if (Character.isDigit(ch)) { do { endPosition++; number += ch; ch = source.read(); } while (Character.isDigit(ch)); return newSciToken(number, startPosition, endPosition); } else { return newErrorToken(number, startPosition, endPosition); } } else { return newFloatToken(number, startPosition, endPosition); } } // second item is none else { return newNumberToken(number, startPosition, endPosition); } } catch (IOException e) { e.printStackTrace(); } } // case number starts with decimal if (ch == '.') { try { String number = ""; endPosition++; number += ch; ch = source.read(); // digit followed by decimal if (Character.isDigit(ch)) { do { endPosition++; number += ch; ch = source.read(); } while (Character.isDigit(ch)); return newFloatToken(number, startPosition, endPosition); } else { return newErrorToken(number, startPosition, endPosition); } } catch (IOException e) { e.printStackTrace(); } } // Check for char, if it is followed by ' if (ch == '\'') { try { String character = ""; // Read the second item endPosition++; character += ch; ch = source.read(); if (ch == '\'') // quote followed by qoute is error { return newErrorToken(character, startPosition, endPosition); } else { endPosition++; character += ch; ch = source.read(); if (ch == '\'') // quote followed by a char followed by a qoute { endPosition++; character += ch; ch = source.read(); return newCharToken(character, startPosition, endPosition); } else // quote followed by no closing qoute after 1 letter throws an error token { return newErrorToken(character, startPosition, endPosition); } } } catch (IOException e) { e.printStackTrace(); } } // At this point the only tokens to check for are one or two // characters; we must also check for comments that begin with // 2 slashes String charOld = "" + ch; String op = charOld; Symbol sym; try { endPosition++; ch = source.read(); op += ch; // check if valid 2 char operator; if it's not in the symbol // table then don't insert it since we really have a one char // token sym = Symbol.symbol(op, Tokens.BogusToken); if (sym == null) { // it must be a one char token return makeToken(charOld, startPosition, endPosition); // return error token } endPosition++; ch = source.read(); return makeToken(op, startPosition, endPosition); } catch (Exception e) { } atEOF = true; if (startPosition == endPosition) { op = charOld; } return makeToken(op, startPosition, endPosition); }