private void createMarks(List<Mark> acc, Mark.ENTRY_PART part, String text, int firstMissing) { char[] chars = text.toCharArray(); int i = firstMissing; while ((i = editorFont.canDisplayUpTo(chars, i, chars.length)) != -1) { int cp = Character.codePointAt(chars, i); int start = i; i += Character.charCount(cp); Font font = FontFallbackManager.getCapableFont(cp); if (font == null) { continue; } // Look ahead to try to group as many characters as possible into this run. for (int cpn, ccn, j = i; j < chars.length; j += ccn) { cpn = Character.codePointAt(chars, j); ccn = Character.charCount(cpn); if (!editorFont.canDisplay(cpn) && font.canDisplay(cpn)) { i += ccn; } else { break; } } Mark m = new Mark(part, start, i); m.attributes = getAttributes(font); acc.add(m); } }
public int guessFullNameStyle(String name) { if (name == null) { return FullNameStyle.UNDEFINED; } int nameStyle = FullNameStyle.UNDEFINED; int length = name.length(); int offset = 0; while (offset < length) { int codePoint = Character.codePointAt(name, offset); if (Character.isLetter(codePoint)) { UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); if (!isLatinUnicodeBlock(unicodeBlock)) { if (isCJKUnicodeBlock(unicodeBlock)) { // We don't know if this is Chinese, Japanese or Korean - // trying to figure out by looking at other characters in the name return guessCJKNameStyle(name, offset + Character.charCount(codePoint)); } if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { return FullNameStyle.JAPANESE; } if (isKoreanUnicodeBlock(unicodeBlock)) { return FullNameStyle.KOREAN; } } nameStyle = FullNameStyle.WESTERN; } offset += Character.charCount(codePoint); } return nameStyle; }
public int guessPhoneticNameStyle(String name) { if (name == null) { return PhoneticNameStyle.UNDEFINED; } int nameStyle = PhoneticNameStyle.UNDEFINED; int length = name.length(); int offset = 0; while (offset < length) { int codePoint = Character.codePointAt(name, offset); if (Character.isLetter(codePoint)) { UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { return PhoneticNameStyle.JAPANESE; } if (isKoreanUnicodeBlock(unicodeBlock)) { return PhoneticNameStyle.KOREAN; } if (isLatinUnicodeBlock(unicodeBlock)) { return PhoneticNameStyle.PINYIN; } } offset += Character.charCount(codePoint); } return nameStyle; }
/** * Logically casts input to UTF32 ints then looks up the output or null if the input is not * accepted. FST must be INPUT_TYPE.BYTE4. */ public static <T> T get(FST<T> fst, CharSequence input) throws IOException { assert fst.inputType == FST.INPUT_TYPE.BYTE4; // TODO: would be nice not to alloc this on every lookup final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>()); int charIdx = 0; final int charLimit = input.length(); // Accumulate output as we go final T NO_OUTPUT = fst.outputs.getNoOutput(); T output = NO_OUTPUT; while (charIdx < charLimit) { final int utf32 = Character.codePointAt(input, charIdx); charIdx += Character.charCount(utf32); if (fst.findTargetArc(utf32, arc, arc) == null) { return null; } else if (arc.output != NO_OUTPUT) { output = fst.outputs.add(output, arc.output); } } if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) { return null; } else if (arc.output != NO_OUTPUT) { return fst.outputs.add(output, arc.output); } else { return output; } }
/** * Determines if a character sequence is a QName. A QName is either an NCName (LocalName), or an * NCName followed by a colon followed by another NCName (where the first NCName is referred to as * the 'Prefix Name' and the second NCName is referred to as the 'Local Name' - i.e. * PrefixName:LocalName). * * @param s The character sequence to be tested. * @return {@code true} if {@code s} is a QName, otherwise {@code false}. */ public static boolean isQName(CharSequence s) { if (isNullOrEmpty(s)) { return false; } boolean foundColon = false; boolean inNCName = false; for (int i = 0; i < s.length(); ) { int codePoint = Character.codePointAt(s, i); if (codePoint == ':') { if (foundColon) { return false; } foundColon = true; if (!inNCName) { return false; } inNCName = false; } else { if (!inNCName) { if (!isXMLNameStartCharacter(codePoint)) { return false; } inNCName = true; } else { if (!isXMLNameChar(codePoint)) { return false; } } } i += Character.charCount(codePoint); } return true; }
/** * Unicode文字列に変換する("あ" -> "\u3042") * * @param original * @return */ public static String convertToUnicode(String original) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < original.length(); i++) { sb.append(String.format("\\u%04X", Character.codePointAt(original, i))); } return sb.toString(); }
/** * Escapes a character sequence so that it is valid XML. * * @param s The character sequence. * @return The escaped version of the character sequence. */ public static String escapeXML(CharSequence s) { // double quote -- quot // ampersand -- amp // less than -- lt // greater than -- gt // apostrophe -- apos StringBuilder sb = new StringBuilder(s.length() * 2); for (int i = 0; i < s.length(); ) { int codePoint = Character.codePointAt(s, i); if (codePoint == '<') { sb.append(LT); } else if (codePoint == '>') { sb.append(GT); } else if (codePoint == '\"') { sb.append(QUOT); } else if (codePoint == '&') { sb.append(AMP); } else if (codePoint == '\'') { sb.append(APOS); } else { sb.appendCodePoint(codePoint); } i += Character.charCount(codePoint); } return sb.toString(); }
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { char buffer[] = termAtt.buffer(); int length = termAtt.length(); for (int i = 0; i < length; i++) { int ch = Character.codePointAt(buffer, i, length); // look for digits outside of basic latin if (ch > 0x7F && Character.isDigit(ch)) { // replace with equivalent basic latin digit buffer[i] = (char) ('0' + Character.getNumericValue(ch)); // if the original was supplementary, shrink the string if (ch > 0xFFFF) { length = StemmerUtil.delete(buffer, i + 1, length); termAtt.setLength(length); } } } return true; } else { return false; } }
/** puts as utf-8 string */ protected int _put(String str) { final int len = str.length(); int total = 0; for (int i = 0; i < len; ) { int c = Character.codePointAt(str, i); if (c < 0x80) { _buf.write((byte) c); total += 1; } else if (c < 0x800) { _buf.write((byte) (0xc0 + (c >> 6))); _buf.write((byte) (0x80 + (c & 0x3f))); total += 2; } else if (c < 0x10000) { _buf.write((byte) (0xe0 + (c >> 12))); _buf.write((byte) (0x80 + ((c >> 6) & 0x3f))); _buf.write((byte) (0x80 + (c & 0x3f))); total += 3; } else { _buf.write((byte) (0xf0 + (c >> 18))); _buf.write((byte) (0x80 + ((c >> 12) & 0x3f))); _buf.write((byte) (0x80 + ((c >> 6) & 0x3f))); _buf.write((byte) (0x80 + (c & 0x3f))); total += 4; } i += Character.charCount(c); } _buf.write((byte) 0); total++; return total; }
/** * Determines if a character sequence is an NCName (Non-Colonised Name). An NCName is a string * which starts with an NCName start character and is followed by zero or more NCName characters. * * @param s The character sequence to be tested. * @return {@code true} if {@code s} is an NCName, otherwise {@code false}. */ public static boolean isNCName(CharSequence s) { if (isNullOrEmpty(s)) { return false; } int firstCodePoint = Character.codePointAt(s, 0); if (!isNCNameStartChar(firstCodePoint)) { return false; } for (int i = Character.charCount(firstCodePoint); i < s.length(); ) { int codePoint = Character.codePointAt(s, i); if (!isNCNameChar(codePoint)) { return false; } i += Character.charCount(codePoint); } return true; }
private static byte[] stringToByteArray(String string) { int len = string.length(); byte[] bytes = new byte[len]; for (int i = 0; i < len; i++) { bytes[i] = (byte) Character.codePointAt(string, i); } return bytes; }
public void testRandomRealisticWhiteSpace() throws IOException { Map<String, String> map = new HashMap<>(); Set<String> seen = new HashSet<>(); int numTerms = atLeast(50); boolean ignoreCase = random().nextBoolean(); for (int i = 0; i < numTerms; i++) { String randomRealisticUnicodeString = TestUtil.randomRealisticUnicodeString(random()); char[] charArray = randomRealisticUnicodeString.toCharArray(); StringBuilder builder = new StringBuilder(); for (int j = 0; j < charArray.length; ) { int cp = Character.codePointAt(charArray, j, charArray.length); if (!Character.isWhitespace(cp)) { builder.appendCodePoint(cp); } j += Character.charCount(cp); } if (builder.length() > 0) { String inputValue = builder.toString(); // Make sure we don't try to add two inputs that vary only by case: String seenInputValue; if (ignoreCase) { // TODO: can we simply use inputValue.toLowerCase(Locale.ROOT)??? char[] buffer = inputValue.toCharArray(); CharacterUtils.toLowerCase(buffer, 0, buffer.length); seenInputValue = buffer.toString(); } else { seenInputValue = inputValue; } if (seen.contains(seenInputValue) == false) { seen.add(seenInputValue); String value = TestUtil.randomSimpleString(random()); map.put(inputValue, value.isEmpty() ? "a" : value); } } } if (map.isEmpty()) { map.put("booked", "books"); } StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase); Set<Entry<String, String>> entrySet = map.entrySet(); StringBuilder input = new StringBuilder(); List<String> output = new ArrayList<>(); for (Entry<String, String> entry : entrySet) { builder.add(entry.getKey(), entry.getValue()); if (random().nextBoolean() || output.isEmpty()) { input.append(entry.getKey()).append(" "); output.add(entry.getValue()); } } Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader(input.toString())); TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.build())); assertTokenStreamContents(stream, output.toArray(new String[0])); }
public static int getWordCount(String s) { if (s == null) { return 0; } int length = 0; for (int i = 0; i < s.length(); i++) { int ascii = Character.codePointAt(s, i); if (ascii >= 0 && ascii <= 255) length++; else length += 2; } return length; }
public void setBatchInputWord(final String word) { reset(); mIsBatchMode = true; final int length = word.length(); for (int i = 0; i < length; i = Character.offsetByCodePoints(word, i, 1)) { final int codePoint = Character.codePointAt(word, i); // We don't want to override the batch input points that are held in mInputPointers // (See {@link #add(int,int,int)}). final Event processedEvent = processEvent(Event.createEventForCodePointFromUnknownSource(codePoint)); applyProcessedEvent(processedEvent); } }
/** * Returns the range of the input or {@code all-inclusive range} if input is empty * * @param text input text * @return range of input, or {@code all-inclusive} if empty input */ public static Range createTextRange(final CharSequence text) { final int len = text.length(); if (len == 0) { return new Range(Integer.MIN_VALUE, Integer.MAX_VALUE); } int minCodePoint = Integer.MAX_VALUE; int maxCodePoint = Integer.MIN_VALUE; for (int i = 0; i < len; ) { final int cp = Character.codePointAt(text, i); minCodePoint = Math.min(minCodePoint, cp); maxCodePoint = Math.max(maxCodePoint, cp); i += Character.charCount(cp); } return new Range(minCodePoint, maxCodePoint); }
public boolean containsAnyCodePoint(final CharSequence text, final Range inputRange) { final int len = text.length(); if (len == 0) { return false; } if (mostSignificantGap != null && mostSignificantGap.contains(inputRange.first) && mostSignificantGap.contains(inputRange.last)) { return false; } // if found, returns the index, otherwise "-insertionPoint - 1" final int idxEnd = Arrays.binarySearch(ranges, new Range(inputRange.last), CONTAINS_COMPARATOR); // search for start in "head" range only (likely small) final int startFromIdx = 0; final int startEndIdx = idxEnd >= 0 ? idxEnd + 1 : -(idxEnd + 1); final int idxStart = Arrays.binarySearch( ranges, startFromIdx, startEndIdx, new Range(inputRange.first), CONTAINS_COMPARATOR); // If whole range in text outside same non-contained range, won't be found // If whole range in text inside single contained range, must match if (idxStart == idxEnd) { return idxStart >= 0; } // if start or end inside contained range, match if (idxStart >= 0 || idxEnd >= 0) { return true; } // text spans across multiple ranges of set, need to search individual chars final int searchStart = -idxStart + 1; final int searchEnd = -idxEnd + 1; for (int i = 0; i < len; ) { final int cp = Character.codePointAt(text, i); i += Character.charCount(cp); final int idx = Arrays.binarySearch(ranges, searchStart, searchEnd, new Range(cp), CONTAINS_COMPARATOR); if (idx > 0) { return true; } } return false; }
/** * Escape input chars to be shown in html. * * @param input * @return */ public static String escapeHtml(String input) { StringBuilder builder = new StringBuilder(); int pos = 0; int len = input.length(); while (pos < len) { int codePoint = Character.codePointAt(input, pos); if (codePoint == '"') builder.append("""); else if (codePoint == '&') builder.append("&"); else if (codePoint == '<') builder.append("<"); else if (codePoint == '>') builder.append(">"); else if (codePoint == '\n') builder.append("<br />"); else if (codePoint >= 0 && codePoint < 160) builder.append(Character.toChars(codePoint)); else builder.append("&#").append(codePoint).append(';'); pos += Character.charCount(codePoint); } return builder.toString(); }
private int guessCJKNameStyle(String name, int offset) { int length = name.length(); while (offset < length) { int codePoint = Character.codePointAt(name, offset); if (Character.isLetter(codePoint)) { UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { return FullNameStyle.JAPANESE; } if (isKoreanUnicodeBlock(unicodeBlock)) { return FullNameStyle.KOREAN; } } offset += Character.charCount(codePoint); } return FullNameStyle.CJK; }
/** * Copies the codepoints in a CharSequence to an int array. * * <p>This method assumes there is enough space in the array to store the code points. The size * can be measured with Character#codePointCount(CharSequence, int, int) before passing to this * method. If the int array is too small, an ArrayIndexOutOfBoundsException will be thrown. Also, * this method makes no effort to be thread-safe. Do not modify the CharSequence while this method * is running, or the behavior is undefined. This method can optionally downcase code points * before copying them, but it pays no attention to locale while doing so. * * @param destination the int array. * @param charSequence the CharSequence. * @param startIndex the start index inside the string in java chars, inclusive. * @param endIndex the end index inside the string in java chars, exclusive. * @param downCase if this is true, code points will be downcased before being copied. * @return the number of copied code points. */ public static int copyCodePointsAndReturnCodePointCount( final int[] destination, final CharSequence charSequence, final int startIndex, final int endIndex, final boolean downCase) { int destIndex = 0; for (int index = startIndex; index < endIndex; index = Character.offsetByCodePoints(charSequence, index, 1)) { final int codePoint = Character.codePointAt(charSequence, index); // TODO: stop using this, as it's not aware of the locale and does not always do // the right thing. destination[destIndex] = downCase ? Character.toLowerCase(codePoint) : codePoint; destIndex++; } return destIndex; }
/** * Replaces supplementary characters with a ? character * * @param text * @return */ public static String replaceSupplementaryCharacters(final String text) { if (text == null) { return null; } final int len = text.length(); boolean isSupplementary = false; final StringBuilder result = new StringBuilder(); for (int i = 0; i < len; i++) { final int cp = Character.codePointAt(text, i); isSupplementary = Character.isSupplementaryCodePoint(cp); if (isSupplementary) { result.append("?"); i++; } else { result.append(text.charAt(i)); } } return result.toString(); }
/** * Gets the index of the longest NCName that is the suffix of a character sequence. * * @param s The character sequence. * @return The index of the longest suffix of the specified character sequence {@code s} that is * an NCName, or -1 if the character sequence {@code s} does not have a suffix that is an * NCName. */ public static int getNCNameSuffixIndex(CharSequence s) { // identify bnode labels and do not try to split them if (s.length() > 1 && s.charAt(0) == '_' && s.charAt(1) == ':') { return -1; } int index = -1; for (int i = s.length() - 1; i > -1; i--) { if (!Character.isLowSurrogate(s.charAt(i))) { int codePoint = Character.codePointAt(s, i); if (isNCNameStartChar(codePoint)) { index = i; } if (!isNCNameChar(codePoint)) { break; } } } return index; }
private void escape(StringBuilder out, CharSequence plainText) { int pos = 0; int len = plainText.length(); for (int charCount, i = 0; i < len; i += charCount) { int codePoint = Character.codePointAt(plainText, i); charCount = Character.charCount(codePoint); if (!isControlCharacter(codePoint) && !mustEscapeCharInJsString(codePoint)) { continue; } out.append(plainText, pos, i); pos = i + charCount; switch (codePoint) { case '\b': out.append("\\b"); break; case '\t': out.append("\\t"); break; case '\n': out.append("\\n"); break; case '\f': out.append("\\f"); break; case '\r': out.append("\\r"); break; case '\\': out.append("\\\\"); break; case '/': out.append("\\/"); break; case '"': out.append("\\\""); break; default: appendHexJavaScriptRepresentation(out, codePoint); break; } } out.append(plainText, pos, len); }
/** * Returns information about a position on the line. * * @param offset the line-relative character offset, between 0 and the line length, inclusive * @param trailing true to measure the trailing edge of the character before offset, false to * measure the leading edge of the character at offset. * @param fmi receives metrics information about the requested character, can be null. * @return the signed offset from the leading margin to the requested character edge. */ float measure(int offset, boolean trailing, FontMetricsInt fmi) { int target = trailing ? offset - 1 : offset; if (target < 0) { return 0; } float h = 0; if (!mHasTabs) { if (mDirections == Layout.DIRS_ALL_LEFT_TO_RIGHT) { return measureRun(0, offset, mLen, false, fmi); } if (mDirections == Layout.DIRS_ALL_RIGHT_TO_LEFT) { return measureRun(0, offset, mLen, true, fmi); } } char[] chars = mChars; int[] runs = mDirections.mDirections; for (int i = 0; i < runs.length; i += 2) { int runStart = runs[i]; int runLimit = runStart + (runs[i + 1] & Layout.RUN_LENGTH_MASK); if (runLimit > mLen) { runLimit = mLen; } boolean runIsRtl = (runs[i + 1] & Layout.RUN_RTL_FLAG) != 0; int segstart = runStart; for (int j = mHasTabs ? runStart : runLimit; j <= runLimit; j++) { int codept = 0; Bitmap bm = null; if (mHasTabs && j < runLimit) { codept = chars[j]; if (codept >= 0xd800 && codept < 0xdc00 && j + 1 < runLimit) { codept = Character.codePointAt(chars, j); if (codept >= Layout.MIN_EMOJI && codept <= Layout.MAX_EMOJI) { bm = Layout.EMOJI_FACTORY.getBitmapFromAndroidPua(codept); } else if (codept > 0xffff) { ++j; continue; } } } if (j == runLimit || codept == '\t' || bm != null) { boolean inSegment = target >= segstart && target < j; boolean advance = (mDir == Layout.DIR_RIGHT_TO_LEFT) == runIsRtl; if (inSegment && advance) { return h += measureRun(segstart, offset, j, runIsRtl, fmi); } float w = measureRun(segstart, j, j, runIsRtl, fmi); h += advance ? w : -w; if (inSegment) { return h += measureRun(segstart, offset, j, runIsRtl, null); } if (codept == '\t') { if (offset == j) { return h; } h = mDir * nextTab(h * mDir); if (target == j) { return h; } } if (bm != null) { float bmAscent = ascent(j); float wid = bm.getWidth() * -bmAscent / bm.getHeight(); h += mDir * wid; j++; } segstart = j + 1; } } } return h; }
/** * Renders the TextLine. * * @param c the canvas to render on * @param x the leading margin position * @param top the top of the line * @param y the baseline * @param bottom the bottom of the line */ void draw(Canvas c, float x, int top, int y, int bottom) { if (!mHasTabs) { if (mDirections == Layout.DIRS_ALL_LEFT_TO_RIGHT) { drawRun(c, 0, mLen, false, x, top, y, bottom, false); return; } if (mDirections == Layout.DIRS_ALL_RIGHT_TO_LEFT) { drawRun(c, 0, mLen, true, x, top, y, bottom, false); return; } } float h = 0; int[] runs = mDirections.mDirections; RectF emojiRect = null; int lastRunIndex = runs.length - 2; for (int i = 0; i < runs.length; i += 2) { int runStart = runs[i]; int runLimit = runStart + (runs[i + 1] & Layout.RUN_LENGTH_MASK); if (runLimit > mLen) { runLimit = mLen; } boolean runIsRtl = (runs[i + 1] & Layout.RUN_RTL_FLAG) != 0; int segstart = runStart; for (int j = mHasTabs ? runStart : runLimit; j <= runLimit; j++) { int codept = 0; Bitmap bm = null; if (mHasTabs && j < runLimit) { codept = mChars[j]; if (codept >= 0xd800 && codept < 0xdc00 && j + 1 < runLimit) { codept = Character.codePointAt(mChars, j); if (codept >= Layout.MIN_EMOJI && codept <= Layout.MAX_EMOJI) { bm = Layout.EMOJI_FACTORY.getBitmapFromAndroidPua(codept); } else if (codept > 0xffff) { ++j; continue; } } } if (j == runLimit || codept == '\t' || bm != null) { h += drawRun( c, segstart, j, runIsRtl, x + h, top, y, bottom, i != lastRunIndex || j != mLen); if (codept == '\t') { h = mDir * nextTab(h * mDir); } else if (bm != null) { float bmAscent = ascent(j); float bitmapHeight = bm.getHeight(); float scale = -bmAscent / bitmapHeight; float width = bm.getWidth() * scale; if (emojiRect == null) { emojiRect = new RectF(); } emojiRect.set(x + h, y + bmAscent, x + h + width, y); c.drawBitmap(bm, null, emojiRect, mPaint); h += width; j++; } segstart = j + 1; } } } }
/** * Resumes scanning until the next regular expression is matched, the end of input is encountered * or an I/O-Error occurs. * * @return the next token * @exception java.io.IOException if any I/O-Error occurs */ public java_cup.runtime.Symbol next_token() throws java.io.IOException { int zzInput; int zzAction; // cached fields: int zzCurrentPosL; int zzMarkedPosL; int zzEndReadL = zzEndRead; char[] zzBufferL = zzBuffer; char[] zzCMapL = ZZ_CMAP; int[] zzTransL = ZZ_TRANS; int[] zzRowMapL = ZZ_ROWMAP; int[] zzAttrL = ZZ_ATTRIBUTE; while (true) { zzMarkedPosL = zzMarkedPos; boolean zzR = false; int zzCh; int zzCharCount; for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL += zzCharCount) { zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL, zzMarkedPosL); zzCharCount = Character.charCount(zzCh); switch (zzCh) { case '\u000B': case '\u000C': case '\u0085': case '\u2028': case '\u2029': yyline++; yycolumn = 0; zzR = false; break; case '\r': yyline++; yycolumn = 0; zzR = true; break; case '\n': if (zzR) zzR = false; else { yyline++; yycolumn = 0; } break; default: zzR = false; yycolumn += zzCharCount; } } if (zzR) { // peek one character ahead if it is \n (if we have counted one line too much) boolean zzPeek; if (zzMarkedPosL < zzEndReadL) zzPeek = zzBufferL[zzMarkedPosL] == '\n'; else if (zzAtEOF) zzPeek = false; else { boolean eof = zzRefill(); zzEndReadL = zzEndRead; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; if (eof) zzPeek = false; else zzPeek = zzBufferL[zzMarkedPosL] == '\n'; } if (zzPeek) yyline--; } zzAction = -1; zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; zzState = ZZ_LEXSTATE[zzLexicalState]; // set up zzAction for empty match case: int zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; } zzForAction: { while (true) { if (zzCurrentPosL < zzEndReadL) { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } else if (zzAtEOF) { zzInput = YYEOF; break zzForAction; } else { // store back cached positions zzCurrentPos = zzCurrentPosL; zzMarkedPos = zzMarkedPosL; boolean eof = zzRefill(); // get translated positions and possibly new buffer zzCurrentPosL = zzCurrentPos; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; break zzForAction; } else { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } } int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; if (zzNext == -1) break zzForAction; zzState = zzNext; zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; if ((zzAttributes & 8) == 8) break zzForAction; } } } // store back cached position zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { case 1: { System.out.println( "Token No Valido <" + yytext() + ">linea: " + (yyline + 1) + " columna: " + (yycolumn + 1)); } case 11: break; case 2: { return symbol(sym.NUM, new Integer(yytext())); } case 12: break; case 3: { return symbol(sym.MAS); } case 13: break; case 4: { return symbol(sym.MENOS); } case 14: break; case 5: { return symbol(sym.PYC); } case 15: break; case 6: { return symbol(sym.PARIZQ); } case 16: break; case 7: { return symbol(sym.PARDER); } case 17: break; case 8: { return symbol(sym.DIV); } case 18: break; case 9: { return symbol(sym.MUL); } case 19: break; case 10: { } case 20: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; zzDoEOF(); { return new java_cup.runtime.Symbol(sym.EOF); } } else { zzScanError(ZZ_NO_MATCH); } } } }
/** * Resumes scanning until the next regular expression is matched, the end of input is encountered * or an I/O-Error occurs. * * @return the next token * @exception java.io.IOException if any I/O-Error occurs */ public int yylex() throws java.io.IOException { int zzInput; int zzAction; // cached fields: int zzCurrentPosL; int zzMarkedPosL; int zzEndReadL = zzEndRead; char[] zzBufferL = zzBuffer; char[] zzCMapL = ZZ_CMAP; int[] zzTransL = ZZ_TRANS; int[] zzRowMapL = ZZ_ROWMAP; int[] zzAttrL = ZZ_ATTRIBUTE; while (true) { zzMarkedPosL = zzMarkedPos; boolean zzR = false; int zzCh; int zzCharCount; for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL += zzCharCount) { zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL, zzMarkedPosL); zzCharCount = Character.charCount(zzCh); switch (zzCh) { case '\u000B': case '\u000C': case '\u0085': case '\u2028': case '\u2029': yyline++; yycolumn = 0; zzR = false; break; case '\r': yyline++; yycolumn = 0; zzR = true; break; case '\n': if (zzR) zzR = false; else { yyline++; yycolumn = 0; } break; default: zzR = false; yycolumn += zzCharCount; } } if (zzR) { // peek one character ahead if it is \n (if we have counted one line too much) boolean zzPeek; if (zzMarkedPosL < zzEndReadL) zzPeek = zzBufferL[zzMarkedPosL] == '\n'; else if (zzAtEOF) zzPeek = false; else { boolean eof = zzRefill(); zzEndReadL = zzEndRead; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; if (eof) zzPeek = false; else zzPeek = zzBufferL[zzMarkedPosL] == '\n'; } if (zzPeek) yyline--; } zzAction = -1; zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; zzState = ZZ_LEXSTATE[zzLexicalState]; // set up zzAction for empty match case: int zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; } zzForAction: { while (true) { if (zzCurrentPosL < zzEndReadL) { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } else if (zzAtEOF) { zzInput = YYEOF; break zzForAction; } else { // store back cached positions zzCurrentPos = zzCurrentPosL; zzMarkedPos = zzMarkedPosL; boolean eof = zzRefill(); // get translated positions and possibly new buffer zzCurrentPosL = zzCurrentPos; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; break zzForAction; } else { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } } int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; if (zzNext == -1) break zzForAction; zzState = zzNext; zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; if ((zzAttributes & 8) == 8) break zzForAction; } } } // store back cached position zzMarkedPos = zzMarkedPosL; if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; return YYEOF; } else { switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { case 1: { System.out.print(yytext()); } case 11: break; case 2: { System.out.println("Token " + new Identifier(yytext().toString())); } case 12: break; case 3: { System.out.println("Token " + new Int(Integer.parseInt(yytext())).toString()); } case 13: break; case 4: { System.out.println("Token " + new Delimiter(yytext().toString())); } case 14: break; case 5: { /* Ignorar whitespace. */ } case 15: break; case 6: { System.out.println("Token " + new Operator(yytext().toString())); } case 16: break; case 7: { System.out.println( "Illegal Token '" + yytext() + "' line: " + yyline + ", column: " + yycolumn); } case 17: break; case 8: { System.out.println("Token " + new Keyword(yytext().toString())); } case 18: break; case 9: { System.out.println("Token " + new Float(Double.parseDouble(yytext())).toString()); } case 19: break; case 10: { /* Ignorar Comment */ } case 20: break; default: zzScanError(ZZ_NO_MATCH); } } } }
private void printString(String o) throws IOException { append('\"'); char[] chars = o.toCharArray(); for (int i = 0; i < chars.length; i++) { char ch = chars[i]; switch (ch) { case '\"': append('\\'); append('\"'); break; case '>': append('\\'); append('>'); break; case '<': append('\\'); append('<'); break; case '\'': append('\\'); append('\''); break; case '\\': append('\\'); append('\\'); break; case '\n': append('\\'); append('n'); break; case '\r': append('\\'); append('r'); break; case '\t': append('\\'); append('t'); break; case ' ': // needed because other space chars will be escaped in the default branch append(' '); break; default: int cp = Character.codePointAt(chars, i); if (Character.isSpaceChar(cp) || Character.isISOControl(cp) || Character.UnicodeBlock.SPECIALS.equals(Character.UnicodeBlock.of(cp))) { // these characters are invisible or otherwise unreadable and we escape them here // for clarity of the serialized string if (cp <= Byte.MAX_VALUE) { append("\\a" + String.format("%02x", (int) ch)); } else if (cp <= Character.MAX_VALUE) { append("\\u" + String.format("%04x", (int) ch)); } else { append("\\U" + String.format("%06x", (int) ch)); } if (Character.isHighSurrogate(ch)) { i++; // skip the next char } } else { append(ch); if (Character.isHighSurrogate(ch) && i + 1 < chars.length) { append(chars[++i]); } } } } append('\"'); }
/** * Resumes scanning until the next regular expression is matched, the end of input is encountered * or an I/O-Error occurs. * * @return the next token * @exception java.io.IOException if any I/O-Error occurs */ public ParsedSymbol yylex() throws java.io.IOException, MethodInfoParseException { int zzInput; int zzAction; // cached fields: int zzCurrentPosL; int zzMarkedPosL; int zzEndReadL = zzEndRead; char[] zzBufferL = zzBuffer; char[] zzCMapL = ZZ_CMAP; int[] zzTransL = ZZ_TRANS; int[] zzRowMapL = ZZ_ROWMAP; int[] zzAttrL = ZZ_ATTRIBUTE; while (true) { zzMarkedPosL = zzMarkedPos; yychar += zzMarkedPosL - zzStartRead; boolean zzR = false; int zzCh; int zzCharCount; for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL += zzCharCount) { zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL, zzMarkedPosL); zzCharCount = Character.charCount(zzCh); switch (zzCh) { case '\u000B': case '\u000C': case '\u0085': case '\u2028': case '\u2029': yyline++; yycolumn = 0; zzR = false; break; case '\r': yyline++; yycolumn = 0; zzR = true; break; case '\n': if (zzR) { zzR = false; } else { yyline++; yycolumn = 0; } break; default: zzR = false; yycolumn += zzCharCount; } } if (zzR) { // peek one character ahead if it is \n (if we have counted one line too much) boolean zzPeek; if (zzMarkedPosL < zzEndReadL) { zzPeek = zzBufferL[zzMarkedPosL] == '\n'; } else if (zzAtEOF) { zzPeek = false; } else { boolean eof = zzRefill(); zzEndReadL = zzEndRead; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; if (eof) { zzPeek = false; } else { zzPeek = zzBufferL[zzMarkedPosL] == '\n'; } } if (zzPeek) { yyline--; } } zzAction = -1; zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; zzState = ZZ_LEXSTATE[zzLexicalState]; // set up zzAction for empty match case: int zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; } zzForAction: { while (true) { if (zzCurrentPosL < zzEndReadL) { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } else if (zzAtEOF) { zzInput = YYEOF; break zzForAction; } else { // store back cached positions zzCurrentPos = zzCurrentPosL; zzMarkedPos = zzMarkedPosL; boolean eof = zzRefill(); // get translated positions and possibly new buffer zzCurrentPosL = zzCurrentPos; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; break zzForAction; } else { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } } int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; if (zzNext == -1) { break zzForAction; } zzState = zzNext; zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; if ((zzAttributes & 8) == 8) { break zzForAction; } } } } // store back cached position zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { case 1: { } case 36: break; case 2: { return new ParsedSymbol(ParsedSymbol.TYPE_IDENTIFIER, yytext()); } case 37: break; case 3: { return new ParsedSymbol(ParsedSymbol.TYPE_INTEGER, Long.parseLong((yytext()))); } case 38: break; case 4: { isMultiname = false; yybegin(STRING); string.setLength(0); } case 39: break; case 5: { return new ParsedSymbol(ParsedSymbol.TYPE_COLON); } case 40: break; case 6: { return new ParsedSymbol(ParsedSymbol.TYPE_COMMA); } case 41: break; case 7: { return new ParsedSymbol(ParsedSymbol.TYPE_STAR); } case 42: break; case 8: { return new ParsedSymbol(ParsedSymbol.TYPE_ASSIGN); } case 43: break; case 9: { string.append(yytext()); } case 44: break; case 10: { throw new MethodInfoParseException("Unterminated string at end of line", yyline + 1); } case 45: break; case 11: { yybegin(YYINITIAL); // length also includes the trailing quote if (isMultiname) { return new ParsedSymbol(ParsedSymbol.TYPE_MULTINAME, multinameId); } else { return new ParsedSymbol(ParsedSymbol.TYPE_STRING, string.toString()); } } case 46: break; case 12: { return new ParsedSymbol(ParsedSymbol.TYPE_FLOAT, Double.parseDouble((yytext()))); } case 47: break; case 13: { throw new MethodInfoParseException( "Illegal escape sequence \"" + yytext() + "\"", yyline + 1); } case 48: break; case 14: { string.append('\n'); } case 49: break; case 15: { string.append('\t'); } case 50: break; case 16: { string.append('\b'); } case 51: break; case 17: { string.append('\r'); } case 52: break; case 18: { string.append('\\'); } case 53: break; case 19: { string.append('\"'); } case 54: break; case 20: { string.append('\f'); } case 55: break; case 21: { string.append('\''); } case 56: break; case 22: { return new ParsedSymbol(ParsedSymbol.TYPE_DOTS); } case 57: break; case 23: { return new ParsedSymbol(ParsedSymbol.TYPE_NULL); } case 58: break; case 24: { return new ParsedSymbol(ParsedSymbol.TYPE_TRUE); } case 59: break; case 25: { char val = (char) Integer.parseInt(yytext().substring(2), 16); string.append(val); } case 60: break; case 26: { isMultiname = true; String s = yytext(); multinameId = Long.parseLong(s.substring(2, s.length() - 2)); yybegin(STRING); string.setLength(0); } case 61: break; case 27: { return new ParsedSymbol(ParsedSymbol.TYPE_FALSE); } case 62: break; case 28: { return new ParsedSymbol(ParsedSymbol.TYPE_STATIC); } case 63: break; case 29: { return new ParsedSymbol(ParsedSymbol.TYPE_PRIVATE); } case 64: break; case 30: { return new ParsedSymbol(ParsedSymbol.TYPE_PACKAGE); } case 65: break; case 31: { return new ParsedSymbol(ParsedSymbol.TYPE_INTERNAL); } case 66: break; case 32: { return new ParsedSymbol(ParsedSymbol.TYPE_EXPLICIT); } case 67: break; case 33: { return new ParsedSymbol(ParsedSymbol.TYPE_UNDEFINED); } case 68: break; case 34: { return new ParsedSymbol(ParsedSymbol.TYPE_PROTECTED); } case 69: break; case 35: { String s = yytext(); long ns = Long.parseLong(s.substring(3, s.length() - 2)); return new ParsedSymbol(ParsedSymbol.TYPE_NAMESPACE, ns); } case 70: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; { return new ParsedSymbol(ParsedSymbol.TYPE_EOF); } } else { zzScanError(ZZ_NO_MATCH); } } } }
/** * Set an identifier to analyze. Afterwards, call methods like getScripts() * * @param identifier the identifier to analyze * @return self * @internal * @deprecated This API is ICU internal only. */ @Deprecated public IdentifierInfo setIdentifier(String identifier) { this.identifier = identifier; clear(); BitSet scriptsForCP = new BitSet(); int cp; for (int i = 0; i < identifier.length(); i += Character.charCount(i)) { cp = Character.codePointAt(identifier, i); // Store a representative character for each kind of decimal digit if (UCharacter.getType(cp) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) { // Just store the zero character as a representative for comparison. Unicode guarantees it // is cp - value numerics.add(cp - UCharacter.getNumericValue(cp)); } UScript.getScriptExtensions(cp, scriptsForCP); scriptsForCP.clear(UScript.COMMON); scriptsForCP.clear(UScript.INHERITED); // if (temp.cardinality() == 0) { // // HACK for older version of ICU // requiredScripts.set(UScript.getScript(cp)); // } else switch (scriptsForCP.cardinality()) { case 0: break; case 1: // Single script, record it. requiredScripts.or(scriptsForCP); break; default: if (!requiredScripts.intersects(scriptsForCP) && scriptSetSet.add(scriptsForCP)) { scriptsForCP = new BitSet(); } break; } } // Now make a final pass through to remove alternates that came before singles. // [Kana], [Kana Hira] => [Kana] // This is relatively infrequent, so doesn't have to be optimized. // We also compute any commonalities among the alternates. if (scriptSetSet.size() > 0) { commonAmongAlternates.set(0, UScript.CODE_LIMIT); for (Iterator<BitSet> it = scriptSetSet.iterator(); it.hasNext(); ) { final BitSet next = it.next(); // [Kana], [Kana Hira] => [Kana] if (requiredScripts.intersects(next)) { it.remove(); } else { // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]] commonAmongAlternates.and(next); // get the intersection. for (BitSet other : scriptSetSet) { if (next != other && contains(next, other)) { it.remove(); break; } } } } } if (scriptSetSet.size() == 0) { commonAmongAlternates.clear(); } return this; }
/** * Resumes scanning until the next regular expression is matched, the end of input is encountered * or an I/O-Error occurs. * * @return the next token * @exception java.io.IOException if any I/O-Error occurs */ public java_cup.runtime.Symbol next_token() throws java.io.IOException, ScannerException { int zzInput; int zzAction; // cached fields: int zzCurrentPosL; int zzMarkedPosL; int zzEndReadL = zzEndRead; char[] zzBufferL = zzBuffer; char[] zzCMapL = ZZ_CMAP; int[] zzTransL = ZZ_TRANS; int[] zzRowMapL = ZZ_ROWMAP; int[] zzAttrL = ZZ_ATTRIBUTE; while (true) { zzMarkedPosL = zzMarkedPos; boolean zzR = false; int zzCh; int zzCharCount; for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL += zzCharCount) { zzCh = Character.codePointAt(zzBufferL, zzCurrentPosL, zzMarkedPosL); zzCharCount = Character.charCount(zzCh); switch (zzCh) { case '\u000B': case '\u000C': case '\u0085': case '\u2028': case '\u2029': yyline++; yycolumn = 0; zzR = false; break; case '\r': yyline++; yycolumn = 0; zzR = true; break; case '\n': if (zzR) zzR = false; else { yyline++; yycolumn = 0; } break; default: zzR = false; yycolumn += zzCharCount; } } if (zzR) { // peek one character ahead if it is \n (if we have counted one line too much) boolean zzPeek; if (zzMarkedPosL < zzEndReadL) zzPeek = zzBufferL[zzMarkedPosL] == '\n'; else if (zzAtEOF) zzPeek = false; else { boolean eof = zzRefill(); zzEndReadL = zzEndRead; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; if (eof) zzPeek = false; else zzPeek = zzBufferL[zzMarkedPosL] == '\n'; } if (zzPeek) yyline--; } zzAction = -1; zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; zzState = ZZ_LEXSTATE[zzLexicalState]; // set up zzAction for empty match case: int zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; } zzForAction: { while (true) { if (zzCurrentPosL < zzEndReadL) { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } else if (zzAtEOF) { zzInput = YYEOF; break zzForAction; } else { // store back cached positions zzCurrentPos = zzCurrentPosL; zzMarkedPos = zzMarkedPosL; boolean eof = zzRefill(); // get translated positions and possibly new buffer zzCurrentPosL = zzCurrentPos; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; break zzForAction; } else { zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); zzCurrentPosL += Character.charCount(zzInput); } } int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; if (zzNext == -1) break zzForAction; zzState = zzNext; zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; if ((zzAttributes & 8) == 8) break zzForAction; } } } // store back cached position zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { case 1: { // erreur : .|\n désigne n'importe quel caractère non reconnu // par une des règles précédentes throw new ScannerException( "symbole inconnu, caractère " + yytext() + " ligne " + yyline + " colonne " + yycolumn); } case 22: break; case 2: { /* on ignore les blancs */ } case 23: break; case 3: { return creerSymbole("VARIABLE", TypeSymboles.VARIABLE, yytext()); } case 24: break; case 4: { return creerSymbole("TRUE", TypeSymboles.TRUE); } case 25: break; case 5: { return creerSymbole("FALSE", TypeSymboles.FALSE); } case 26: break; case 6: { return creerSymbole("NON", TypeSymboles.NON); } case 27: break; case 7: { return creerSymbole("OU", TypeSymboles.OU); } case 28: break; case 8: { return creerSymbole("ET", TypeSymboles.ET); } case 29: break; case 9: { return creerSymbole("PO", TypeSymboles.PO); } case 30: break; case 10: { return creerSymbole("PF", TypeSymboles.PF); } case 31: break; case 11: { return creerSymbole("CO", TypeSymboles.CO); } case 32: break; case 12: { return creerSymbole("CF", TypeSymboles.CF); } case 33: break; case 13: { return creerSymbole("VIR", TypeSymboles.VIR); } case 34: break; case 14: { return creerSymbole("POINT", TypeSymboles.POINT); } case 35: break; case 15: { return creerSymbole("IMPL", TypeSymboles.IMPL); } case 36: break; case 16: { return creerSymbole("AFF", TypeSymboles.AFF); } case 37: break; case 17: { return creerSymbole("VL", TypeSymboles.VL); } case 38: break; case 18: { return creerSymbole("EQUIV", TypeSymboles.EQUIV); } case 39: break; case 19: { return creerSymbole("FNC", TypeSymboles.FNC); } case 40: break; case 20: { return creerSymbole("DPLL", TypeSymboles.DPLL); } case 41: break; case 21: { return creerSymbole("TABLE", TypeSymboles.TABLE); } case 42: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; { return new Symbole(TypeSymboles.EOF); } } else { zzScanError(ZZ_NO_MATCH); } } } }