String parse(String string) { int length = string.length(); offsets = new Point[length / 4]; ids = new String[length / 4]; mnemonics = new int[length / 4 + 1]; StringBuffer result = new StringBuffer(); char[] buffer = new char[length]; string.getChars(0, string.length(), buffer, 0); int index = 0, state = 0, linkIndex = 0; int start = 0, tagStart = 0, linkStart = 0, endtagStart = 0, refStart = 0; while (index < length) { char c = Character.toLowerCase(buffer[index]); switch (state) { case 0: if (c == '<') { tagStart = index; state++; } break; case 1: if (c == 'a') state++; break; case 2: switch (c) { case 'h': state = 7; break; case '>': linkStart = index + 1; state++; break; default: if (Character.isWhitespace(c)) break; else state = 13; } break; case 3: if (c == '<') { endtagStart = index; state++; } break; case 4: state = c == '/' ? state + 1 : 3; break; case 5: state = c == 'a' ? state + 1 : 3; break; case 6: if (c == '>') { mnemonics[linkIndex] = parseMnemonics(buffer, start, tagStart, result); int offset = result.length(); parseMnemonics(buffer, linkStart, endtagStart, result); offsets[linkIndex] = new Point(offset, result.length() - 1); if (ids[linkIndex] == null) { ids[linkIndex] = new String(buffer, linkStart, endtagStart - linkStart); } linkIndex++; start = tagStart = linkStart = endtagStart = refStart = index + 1; state = 0; } else { state = 3; } break; case 7: state = c == 'r' ? state + 1 : 0; break; case 8: state = c == 'e' ? state + 1 : 0; break; case 9: state = c == 'f' ? state + 1 : 0; break; case 10: state = c == '=' ? state + 1 : 0; break; case 11: if (c == '"') { state++; refStart = index + 1; } else { state = 0; } break; case 12: if (c == '"') { ids[linkIndex] = new String(buffer, refStart, index - refStart); state = 2; } break; case 13: if (Character.isWhitespace(c)) { state = 0; } else if (c == '=') { state++; } break; case 14: state = c == '"' ? state + 1 : 0; break; case 15: if (c == '"') state = 2; break; default: state = 0; break; } index++; } if (start < length) { int tmp = parseMnemonics(buffer, start, tagStart, result); int mnemonic = parseMnemonics(buffer, Math.max(tagStart, linkStart), length, result); if (mnemonic == -1) mnemonic = tmp; mnemonics[linkIndex] = mnemonic; } else { mnemonics[linkIndex] = -1; } if (offsets.length != linkIndex) { Point[] newOffsets = new Point[linkIndex]; System.arraycopy(offsets, 0, newOffsets, 0, linkIndex); offsets = newOffsets; String[] newIDs = new String[linkIndex]; System.arraycopy(ids, 0, newIDs, 0, linkIndex); ids = newIDs; int[] newMnemonics = new int[linkIndex + 1]; System.arraycopy(mnemonics, 0, newMnemonics, 0, linkIndex + 1); mnemonics = newMnemonics; } return result.toString(); }
/** Returns the next lexical token in the document. */ public int nextToken() { int c; fStartToken = fPos; while (true) { switch (c = read()) { case EOF: return EOF; case '/': // comment c = read(); if (c == '/') { while (true) { c = read(); if ((c == EOF) || (c == EOL)) { unread(c); return COMMENT; } } } else { unread(c); } return OTHER; case '\'': // char const character: for (; ; ) { c = read(); switch (c) { case '\'': return STRING; case EOF: unread(c); return STRING; case '\\': c = read(); break; } } case '"': // string string: for (; ; ) { c = read(); switch (c) { case '"': return STRING; case EOF: unread(c); return STRING; case '\\': c = read(); break; } } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': do { c = read(); } while (Character.isDigit((char) c)); unread(c); return NUMBER; default: if (Character.isWhitespace((char) c)) { do { c = read(); } while (Character.isWhitespace((char) c)); unread(c); return WHITE; } if (Character.isJavaIdentifierStart((char) c)) { fBuffer.setLength(0); do { fBuffer.append((char) c); c = read(); } while (Character.isJavaIdentifierPart((char) c)); unread(c); Integer i = (Integer) fgKeys.get(fBuffer.toString()); if (i != null) return i.intValue(); return WORD; } return OTHER; } } }