/** * Creates a new <code>DictionaryNameFactory</code>. * * @param file the file from which the names can be read. * @param nameFactory the name factory from which names will be retrieved if the list of read * names has been exhausted. */ public DictionaryNameFactory(File file, NameFactory nameFactory) throws IOException { this.names = new ArrayList(); this.nameFactory = nameFactory; Reader reader = new FileReader(file); try { StringBuffer buffer = new StringBuffer(); while (true) { // Read the next character. int c = reader.read(); // Is it a valid identifier character? if (c != -1 && (buffer.length() == 0 ? Character.isJavaIdentifierStart((char) c) : Character.isJavaIdentifierPart((char) c))) { // Append it to the current identifier. buffer.append((char) c); } else { // Did we collect a new identifier? if (buffer.length() > 0) { // Add the completed name to the list of names, if it's // not in it yet. String name = buffer.toString(); if (!names.contains(name)) { names.add(name); } // Clear the buffer. buffer.setLength(0); } // Is this the beginning of a comment line? if (c == COMMENT_CHARACTER) { // Skip all characters till the end of the line. do { c = reader.read(); } while (c != -1 && c != '\n' && c != '\r'); } // Is this the end of the file? if (c == -1) { // Just return. return; } } } } finally { reader.close(); } }
public static String getJavaIdentifier(String candidateID) { int len = candidateID.length(); StringBuffer buf = new StringBuffer(); for (int i = 0; i < len; i++) { char ch = candidateID.charAt(i); boolean good = i == 0 ? Character.isJavaIdentifierStart(ch) : Character.isJavaIdentifierPart(ch); if (good) { buf.append(ch); } else { buf.append('_'); } } return buf.toString(); }
public static boolean isJavaIdentifier(String id) { if (id == null) { return false; } int len = id.length(); if (len == 0) { return false; } if (!Character.isJavaIdentifierStart(id.charAt(0))) { return false; } for (int i = 1; i < len; i++) { if (!Character.isJavaIdentifierPart(id.charAt(i))) { return false; } } return true; }
String readSymbol() { StringBuffer sbuf = new StringBuffer(); while (index < s.length()) { char c = s.charAt(index); if (sbuf.length() == 0) { if (!Character.isJavaIdentifierStart(c)) { abortPrefetch(); } } else { if (!Character.isJavaIdentifierPart(c)) { return sbuf.toString(); } } index++; sbuf.append(c); } if (sbuf.length() == 0) { abortPrefetch(); } return sbuf.toString(); }
/** Returns the next lexical token in the document. */ public int nextToken() { int c; fStartToken = fPos; while (true) { switch (c = read()) { case EOF: return EOF; case '/': // comment c = read(); if (c == '/') { while (true) { c = read(); if ((c == EOF) || (c == EOL)) { unread(c); return COMMENT; } } } else { unread(c); } return OTHER; case '\'': // char const character: for (; ; ) { c = read(); switch (c) { case '\'': return STRING; case EOF: unread(c); return STRING; case '\\': c = read(); break; } } case '"': // string string: for (; ; ) { c = read(); switch (c) { case '"': return STRING; case EOF: unread(c); return STRING; case '\\': c = read(); break; } } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': do { c = read(); } while (Character.isDigit((char) c)); unread(c); return NUMBER; default: if (Character.isWhitespace((char) c)) { do { c = read(); } while (Character.isWhitespace((char) c)); unread(c); return WHITE; } if (Character.isJavaIdentifierStart((char) c)) { fBuffer.setLength(0); do { fBuffer.append((char) c); c = read(); } while (Character.isJavaIdentifierPart((char) c)); unread(c); Integer i = (Integer) fgKeys.get(fBuffer.toString()); if (i != null) return i.intValue(); return WORD; } return OTHER; } } }
public static void main(String args[]) throws FileNotFoundException, IOException { System.out.println("Gathering data..."); // Use char[] for ease in building strings, despite only using 8 bits. int numElements = 65536; char[] info = new char[numElements]; for (int i = 0; i < info.length; i++) { if (i == '\n' || i == '\r') info[i] = NEWLINE_CODE; else if (i == ' ' || i == '\t' || i == '\f') info[i] = SPACE_CODE; else if (i < 128 && Character.isLowerCase((char) i)) info[i] = LOWER_CODE; // Ascii lower case else if (i < 128 && Character.isUpperCase((char) i)) info[i] = UPPER_CODE; // Ascii upper case else if (i < 128 && Character.isDigit((char) i)) info[i] = DIGIT_CODE; // Ascii digit else if (Character.isJavaIdentifierStart((char) i)) info[i] = OTHER_LETTER_CODE; else if (Character.isJavaIdentifierPart((char) i)) info[i] = OTHER_DIGIT_CODE; else { info[i] = BAD_CODE; numElements--; } } System.out.println("Compressing tables..."); int bestShift = 0; int bestEst = info.length; String bestBlkStr = null; for (int i = 3; i < 11; i++) { int blkSize = 1 << i; Map blocks = new HashMap(); List blkArray = new ArrayList(); System.out.print("shift: " + i); for (int j = 0; j < info.length; j += blkSize) { String key = new String(info, j, blkSize); if (blocks.get(key) == null) { blkArray.add(key); blocks.put(key, new Integer(blkArray.size())); } } int blkNum = blkArray.size(); int blockLen = blkNum * blkSize; System.out.print(" before " + blockLen); // // Try to pack blkArray, by finding successively smaller matches // between heads and tails of blocks. // for (int j = blkSize - 1; j > 0; j--) { Map tails = new HashMap(); for (int k = 0; k < blkArray.size(); k++) { String str = (String) blkArray.get(k); if (str == null) continue; String tail = str.substring(str.length() - j); List l = (List) tails.get(tail); if (l == null) tails.put(tail, new LinkedList(Collections.singleton(new Integer(k)))); else l.add(new Integer(k)); } // // Now calculate the heads, and merge overlapping blocks // block: for (int k = 0; k < blkArray.size(); k++) { String tomerge = (String) blkArray.get(k); if (tomerge == null) continue; while (true) { String head = tomerge.substring(0, j); LinkedList entry = (LinkedList) tails.get(head); if (entry == null) continue block; Integer other = (Integer) entry.removeFirst(); if (other.intValue() == k) { if (entry.size() > 0) { entry.add(other); other = (Integer) entry.removeFirst(); } else { entry.add(other); continue block; } } if (entry.size() == 0) tails.remove(head); // // A match was found. // String merge = blkArray.get(other.intValue()) + tomerge.substring(j); blockLen -= j; blkNum--; if (other.intValue() < k) { blkArray.set(k, null); blkArray.set(other.intValue(), merge); String tail = merge.substring(merge.length() - j); List l = (List) tails.get(tail); Collections.replaceAll(l, new Integer(k), other); continue block; } blkArray.set(k, merge); blkArray.set(other.intValue(), null); tomerge = merge; } } } StringBuffer blockStr = new StringBuffer(blockLen); for (int k = 0; k < blkArray.size(); k++) { String str = (String) blkArray.get(k); if (str != null) blockStr.append(str); } if (blockStr.length() != blockLen) throw new Error("Unexpected blockLen " + blockLen); int estimate = blockLen + (info.length >> (i - 1)); System.out.println(" after merge " + blockLen + ": " + estimate + " bytes"); if (estimate < bestEst) { bestEst = estimate; bestShift = i; bestBlkStr = blockStr.toString(); } } int blkSize = 1 << bestShift; char[] blocks = new char[info.length / blkSize]; for (int j = 0; j < info.length; j += blkSize) { String key = new String(info, j, blkSize); int index = bestBlkStr.indexOf(key); if (index == -1) throw new Error("Unexpected index for " + j); blocks[j >> bestShift] = (char) (index - j); } // // Process the code.h file // System.out.println("Generating code.h with shift of " + bestShift); PrintStream hfile = new PrintStream(new FileOutputStream("code.h")); printHeader(hfile, new String[] {"\"platform.h\""}); hfile.println("#ifndef code_INCLUDED"); hfile.println("#define code_INCLUDED"); hfile.println(); hfile.println("class Code"); hfile.println("{"); hfile.println(" //"); hfile.println(" // To facilitate the scanning, the character set is partitioned into"); hfile.println(" // 8 categories using the array CODE. These are described below"); hfile.println(" // together with some self-explanatory functions defined on CODE."); hfile.println(" //"); hfile.println(" enum {"); hfile.println(" SHIFT = " + bestShift + ","); hfile.println(" NEWLINE_CODE = " + NEWLINE_CODE + ','); hfile.println(" SPACE_CODE = " + SPACE_CODE + ','); hfile.println(" BAD_CODE = " + BAD_CODE + ','); hfile.println(" DIGIT_CODE = " + DIGIT_CODE + ','); hfile.println(" OTHER_DIGIT_CODE = " + OTHER_DIGIT_CODE + ','); hfile.println(" LOWER_CODE = " + LOWER_CODE + ','); hfile.println(" UPPER_CODE = " + UPPER_CODE + ','); hfile.println(" OTHER_LETTER_CODE = " + OTHER_LETTER_CODE); hfile.println(" };"); hfile.println(); hfile.println(" static char codes[" + bestBlkStr.length() + "];"); hfile.println(" static u2 blocks[" + blocks.length + "];"); hfile.println(); hfile.println(); hfile.println("public:"); hfile.println(); hfile.println(" static inline void SetBadCode(wchar_t c)"); hfile.println(" {"); hfile.println(" codes[(u2) (blocks[c >> SHIFT] + c)] = BAD_CODE;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline void CodeCheck(wchar_t c)"); hfile.println(" {"); hfile.println(" assert((u2) (blocks[c >> SHIFT] + c) < " + bestBlkStr.length() + ");"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool CodeCheck(void)"); hfile.println(" {"); hfile.println(" for (int i = 0; i <= 0xffff; i++)"); hfile.println(" CodeCheck((wchar_t) i);"); hfile.println(" return true;"); hfile.println(" }"); hfile.println(); hfile.println(" //"); hfile.println(" // \\r characters are replaced by \\x0a in Stream::ProcessInput()."); hfile.println(" //"); hfile.println(" static inline bool IsNewline(wchar_t c)"); hfile.println(" {"); hfile.println(" return c == '\\x0a';"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsSpaceButNotNewline(wchar_t c)"); hfile.println(" {"); hfile.println(" return codes[(u2) (blocks[c >> SHIFT] + c)] == SPACE_CODE;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsSpace(wchar_t c)"); hfile.println(" {"); hfile.println(" return codes[(u2) (blocks[c >> SHIFT] + c)] <= SPACE_CODE;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsDigit(wchar_t c)"); hfile.println(" {"); hfile.println(" return codes[(u2) (blocks[c >> SHIFT] + c)] == DIGIT_CODE;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsOctalDigit(wchar_t c)"); hfile.println(" {"); hfile.println(" return c >= U_0 && c <= U_7;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsHexDigit(wchar_t c)"); hfile.println(" {"); hfile.println(" return c <= U_f && (c >= U_a ||"); hfile.println(" (c >= U_A && c <= U_F) ||"); hfile.println(" (c >= U_0 && c <= U_9));"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsUpper(wchar_t c)"); hfile.println(" {"); hfile.println(" return codes[(u2) (blocks[c >> SHIFT] + c)] == UPPER_CODE;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsLower(wchar_t c)"); hfile.println(" {"); hfile.println(" return codes[(u2) (blocks[c >> SHIFT] + c)] == LOWER_CODE;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsAlpha(wchar_t c)"); hfile.println(" {"); hfile.println(" return codes[(u2) (blocks[c >> SHIFT] + c)] >= LOWER_CODE;"); hfile.println(" }"); hfile.println(); hfile.println(" static inline bool IsAlnum(wchar_t c)"); hfile.println(" {"); hfile.println(" return codes[(u2) (blocks[c >> SHIFT] + c)] >= DIGIT_CODE;"); hfile.println(" }"); hfile.println(); hfile.println("};"); hfile.println(); hfile.println("#endif // code_INCLUDED"); printFooter(hfile); hfile.close(); // // Process the code.cpp file // System.out.println("Generating code.cpp"); PrintStream cfile = new PrintStream(new FileOutputStream("code.cpp")); printHeader(cfile, new String[] {"\"code.h\""}); cfile.println("char Code::codes[" + bestBlkStr.length() + "] ="); cfile.println("{"); for (int j = 0; j < bestBlkStr.length(); j += 4) { for (int k = 0; k < 4; k++) { if (k + j >= bestBlkStr.length()) break; if (k == 0) cfile.print(" "); cfile.print(" " + CODE_NAMES[bestBlkStr.charAt(k + j)] + ","); } cfile.println(); } cfile.println("};"); cfile.println(); cfile.println(); cfile.println("//"); cfile.println("// The Blocks vector:"); cfile.println("//"); cfile.println("u2 Code::blocks[" + blocks.length + "] ="); cfile.println("{"); for (int k = 0; k < blocks.length; k += 9) { for (int i = 0; i < 9; i++) { if (k + i >= blocks.length) break; if (i == 0) cfile.print(" "); cfile.print(" 0x" + Integer.toHexString(blocks[k + i]) + ","); } cfile.println(); } cfile.println("};"); printFooter(cfile); cfile.close(); // // Print statistics. // System.out.println( "Total static storage utilization is " + blocks.length * 2 + " bytes for block lookup"); System.out.println(" plus " + bestBlkStr.length() + " bytes for the encodings"); System.out.println( "The number of unicode characters legal in Java sourcecode is " + numElements); }