/** NOTE: The sourceX2 is exclusive. */ public void copyInterval(TerminalRow line, int sourceX1, int sourceX2, int destinationX) { final int x1 = line.findStartOfColumn(sourceX1); final int x2 = line.findStartOfColumn(sourceX2); boolean startingFromSecondHalfOfWideChar = (sourceX1 > 0 && line.wideDisplayCharacterStartingAt(sourceX1 - 1)); final char[] sourceChars = (this == line) ? Arrays.copyOf(line.mText, line.mText.length) : line.mText; int latestNonCombiningWidth = 0; for (int i = x1; i < x2; i++) { char sourceChar = sourceChars[i]; int codePoint = Character.isHighSurrogate(sourceChar) ? Character.toCodePoint(sourceChar, sourceChars[++i]) : sourceChar; if (startingFromSecondHalfOfWideChar) { // Just treat copying second half of wide char as copying whitespace. codePoint = ' '; startingFromSecondHalfOfWideChar = false; } int w = WcWidth.width(codePoint); if (w > 0) { destinationX += latestNonCombiningWidth; sourceX1 += latestNonCombiningWidth; latestNonCombiningWidth = w; } setChar(destinationX, codePoint, line.getStyle(sourceX1)); } }
/** * Returns the Unicode code point of the character at the given index. * * <p>Unlike {@link Character#codePointAt(CharSequence, int)} or {@link String#codePointAt(int)} * this method will never fail silently when encountering an invalid surrogate pair. * * <p>The behaviour of this method is as follows: * * <ol> * <li>If {@code index >= end}, {@link IndexOutOfBoundsException} is thrown. * <li><b>If the character at the specified index is not a surrogate, it is returned.</b> * <li>If the first character was a high surrogate value, then an attempt is made to read the * next character. * <ol> * <li><b>If the end of the sequence was reached, the negated value of the trailing high * surrogate is returned.</b> * <li><b>If the next character was a valid low surrogate, the code point value of the * high/low surrogate pair is returned.</b> * <li>If the next character was not a low surrogate value, then {@link * IllegalArgumentException} is thrown. * </ol> * <li>If the first character was a low surrogate value, {@link IllegalArgumentException} is * thrown. * </ol> * * @param seq the sequence of characters from which to decode the code point * @param index the index of the first character to decode * @param end the index beyond the last valid character to decode * @return the Unicode code point for the given index or the negated value of the trailing high * surrogate character at the end of the sequence */ protected static final int codePointAt(CharSequence seq, int index, int end) { if (index < end) { char c1 = seq.charAt(index++); if (c1 < Character.MIN_HIGH_SURROGATE || c1 > Character.MAX_LOW_SURROGATE) { // Fast path (first test is probably all we need to do) return c1; } else if (c1 <= Character.MAX_HIGH_SURROGATE) { // If the high surrogate was the last character, return its inverse if (index == end) { return -c1; } // Otherwise look for the low surrogate following it char c2 = seq.charAt(index); if (Character.isLowSurrogate(c2)) { return Character.toCodePoint(c1, c2); } throw new IllegalArgumentException( "Expected low surrogate but got char '" + c2 + "' with value " + (int) c2 + " at index " + index); } else { throw new IllegalArgumentException( "Unexpected low surrogate character '" + c1 + "' with value " + (int) c1 + " at index " + (index - 1)); } } throw new IndexOutOfBoundsException("Index exceeds specified range"); }
protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { int mark = src.position(); if (!doneBOM && src.hasRemaining()) { if (dst.remaining() < 4) return CoderResult.OVERFLOW; put(BOM_BIG, dst); doneBOM = true; } try { while (src.hasRemaining()) { char c = src.get(); if (!Character.isSurrogate(c)) { if (dst.remaining() < 4) return CoderResult.OVERFLOW; mark++; put(c, dst); } else if (Character.isHighSurrogate(c)) { if (!src.hasRemaining()) return CoderResult.UNDERFLOW; char low = src.get(); if (Character.isLowSurrogate(low)) { if (dst.remaining() < 4) return CoderResult.OVERFLOW; mark += 2; put(Character.toCodePoint(c, low), dst); } else { return CoderResult.malformedForLength(1); } } else { // assert Character.isLowSurrogate(c); return CoderResult.malformedForLength(1); } } return CoderResult.UNDERFLOW; } finally { src.position(mark); } }
public int next() { char ch = data.charAt(index++); if (Character.isHighSurrogate(ch)) { int ret = Character.toCodePoint(ch, data.charAt(index++)); index += 2; return ret; } else { return ch; } }
/** Returns current character */ int getCurrent() { char c1 = text.current(); if (Character.isHighSurrogate(c1) && text.getIndex() < text.getEndIndex()) { char c2 = text.next(); text.previous(); if (Character.isLowSurrogate(c2)) { return Character.toCodePoint(c1, c2); } } return (int) c1; }
/** Returns previous character */ private int getPrevious() { char c2 = text.previous(); if (Character.isLowSurrogate(c2) && text.getIndex() > text.getBeginIndex()) { char c1 = text.previous(); if (Character.isHighSurrogate(c1)) { return Character.toCodePoint(c1, c2); } else { text.next(); } } return (int) c2; }
/** * Converts a string to a UTF8 byte array. * * @param string string to be converted * @return byte array */ private static byte[] utf8(final String string) { final char[] arr = string.toCharArray(); final int al = arr.length; final TokenBuilder tb = new TokenBuilder(al << 1); for (int c = 0; c < al; ++c) { final char ch = arr[c]; tb.add( Character.isHighSurrogate(ch) && c < al - 1 && Character.isLowSurrogate(arr[c + 1]) ? Character.toCodePoint(ch, arr[++c]) : ch); } return tb.finish(); }
public int previousCodePoint() { int ch1 = previous(); if (Character.isLowSurrogate((char) ch1)) { int ch2 = previous(); if (Character.isHighSurrogate((char) ch2)) { return Character.toCodePoint((char) ch2, (char) ch1); } else if (ch2 != DONE) { // unmatched trail surrogate so back out next(); } } return ch1; }
/** Note that the column may end of second half of wide character. */ public int findStartOfColumn(int column) { if (column == mColumns) return getSpaceUsed(); int currentColumn = 0; int currentCharIndex = 0; while (true) { // 0<2 1 < 2 int newCharIndex = currentCharIndex; char c = mText[newCharIndex++]; // cci=1, cci=2 boolean isHigh = Character.isHighSurrogate(c); int codePoint = isHigh ? Character.toCodePoint(c, mText[newCharIndex++]) : c; int wcwidth = WcWidth.width(codePoint); // 1, 2 if (wcwidth > 0) { currentColumn += wcwidth; if (currentColumn == column) { while (newCharIndex < mSpaceUsed) { // Skip combining chars. if (Character.isHighSurrogate(mText[newCharIndex])) { if (WcWidth.width(Character.toCodePoint(mText[newCharIndex], mText[newCharIndex + 1])) <= 0) { newCharIndex += 2; } else { break; } } else if (WcWidth.width(mText[newCharIndex]) <= 0) { newCharIndex++; } else { break; } } return newCharIndex; } else if (currentColumn > column) { // Wide column going past end. return currentCharIndex; } } currentCharIndex = newCharIndex; } }
private boolean wideDisplayCharacterStartingAt(int column) { for (int currentCharIndex = 0, currentColumn = 0; currentCharIndex < mSpaceUsed; ) { char c = mText[currentCharIndex++]; int codePoint = Character.isHighSurrogate(c) ? Character.toCodePoint(c, mText[currentCharIndex++]) : c; int wcwidth = WcWidth.width(codePoint); if (wcwidth > 0) { if (currentColumn == column && wcwidth == 2) return true; currentColumn += wcwidth; if (currentColumn > column) return false; } } return false; }
private int[] calculateNewlineIndicesAndCheckCodePoints(StringBuilder inputData) throws SnuggleParseException { List<Integer> newlineIndicesBuilder = new ArrayList<Integer>(); newlineIndicesBuilder.add(Integer.valueOf(-1)); char lastChar = 0; char thisChar; /* (16 bit char only) */ int codePoint; /* (Full Unicode code point */ for (int i = 0, length = inputData.length(); i < length; i++, lastChar = thisChar) { thisChar = inputData.charAt(i); if (thisChar == '\n') { newlineIndicesBuilder.add(Integer.valueOf(i)); } if (Character.isHighSurrogate(lastChar)) { if (Character.isLowSurrogate(thisChar)) { codePoint = Character.toCodePoint(lastChar, thisChar); } else { /* Error: last was bad surrogate character */ recordSurrogateError(inputData, i - 1, lastChar); continue; } } else if (Character.isLowSurrogate(thisChar)) { /* Error: this is bad surrogate character */ recordSurrogateError(inputData, i, thisChar); continue; } else { codePoint = thisChar; } /* Check that we allow this codepoint */ if (Character.isISOControl(codePoint) && !(codePoint == '\r' || codePoint == '\n' || codePoint == '\t')) { sessionContext.registerError( new InputError( CoreErrorCode.TTEG02, null, Integer.toHexString(codePoint), Integer.valueOf(i))); inputData.setCharAt(i, ' '); } } /* Make sure last character wasn't surrogate pair starter */ if (Character.isHighSurrogate(lastChar)) { recordSurrogateError(inputData, inputData.length() - 1, lastChar); } /* Finally store newline information */ int[] calculatedNewlineIndices = new int[newlineIndicesBuilder.size()]; for (int i = 0; i < calculatedNewlineIndices.length; i++) { calculatedNewlineIndices[i] = newlineIndicesBuilder.get(i); } return calculatedNewlineIndices; }
/** * Writes the HTML equivalent of the given plain text to output. For example, {@code * escapeHtmlOnto("1 < 2", w)}, is equivalent to {@code w.append("1 < 2")} but possibly with * fewer smaller appends. Elides code-units that are not valid XML Characters. * * @see <a href="http://www.w3.org/TR/2008/REC-xml-20081126/#charsets">XML Ch. 2.2 - * Characters</a> */ @TCB static void encodeHtmlOnto(String plainText, Appendable output) throws IOException { int n = plainText.length(); int pos = 0; for (int i = 0; i < n; ++i) { char ch = plainText.charAt(i); if (ch < REPLACEMENTS.length) { String repl = REPLACEMENTS[ch]; if (repl != null) { output.append(plainText, pos, i).append(repl); pos = i + 1; } } else if (((char) 0xd800) <= ch) { if (ch <= ((char) 0xdfff)) { char next; if (i + 1 < n && Character.isSurrogatePair(ch, next = plainText.charAt(i + 1))) { // Emit supplemental codepoints as entity so that they cannot // be mis-encoded as UTF-8 of surrogates instead of UTF-8 proper // and get involved in UTF-16/UCS-2 confusion. int codepoint = Character.toCodePoint(ch, next); output.append(plainText, pos, i); appendNumericEntity(codepoint, output); ++i; pos = i + 1; } else { output.append(plainText, pos, i); // Elide the orphaned surrogate. pos = i + 1; } } else if (0xff00 <= ch) { output.append(plainText, pos, i); pos = i + 1; // Is a control character or possible full-width version of a // special character. if ((ch & 0xfffe) == 0xfffe) { // Elide since not an the XML Character. } else { appendNumericEntity(ch, output); } } } } output.append(plainText, pos, n); }
/** * Parses a UCS-4 character from the given source buffer, handling surrogates. * * @param c The first character * @param in The source buffer, from which one more character will be consumed if c is a high * surrogate * @returns Either a parsed UCS-4 character, in which case the isPair() and increment() methods * will return meaningful values, or -1, in which case error() will return a descriptive * result object */ public int parse(char c, CharBuffer in) { if (Character.isHighSurrogate(c)) { if (!in.hasRemaining()) { error = CoderResult.UNDERFLOW; return -1; } char d = in.get(); if (Character.isLowSurrogate(d)) { character = Character.toCodePoint(c, d); error = null; return character; } error = CoderResult.malformedForLength(1); return -1; } if (Character.isLowSurrogate(c)) { error = CoderResult.malformedForLength(1); return -1; } character = c; error = null; return character; }
/* */ protected CoderResult encodeLoop( CharBuffer paramCharBuffer, ByteBuffer paramByteBuffer) { /* 146 */ int i = paramCharBuffer.position(); /* 147 */ if ((!this.doneBOM) && (paramCharBuffer.hasRemaining())) { /* 148 */ if (paramByteBuffer.remaining() < 4) /* 149 */ return CoderResult.OVERFLOW; /* 150 */ put(65279, paramByteBuffer); /* 151 */ this.doneBOM = true; /* */ } /* */ try { /* 154 */ while (paramCharBuffer.hasRemaining()) { /* 155 */ char c1 = paramCharBuffer.get(); /* */ CoderResult localCoderResult2; /* 156 */ if (!Character.isSurrogate(c1)) { /* 157 */ if (paramByteBuffer.remaining() < 4) /* 158 */ return CoderResult.OVERFLOW; /* 159 */ i++; /* 160 */ put(c1, paramByteBuffer); /* 161 */ } else if (Character.isHighSurrogate(c1)) { /* 162 */ if (!paramCharBuffer.hasRemaining()) /* 163 */ return CoderResult.UNDERFLOW; /* 164 */ char c2 = paramCharBuffer.get(); /* */ CoderResult localCoderResult4; /* 165 */ if (Character.isLowSurrogate(c2)) { /* 166 */ if (paramByteBuffer.remaining() < 4) /* 167 */ return CoderResult.OVERFLOW; /* 168 */ i += 2; /* 169 */ put(Character.toCodePoint(c1, c2), paramByteBuffer); /* */ } else { /* 171 */ return CoderResult.malformedForLength(1); /* */ } /* */ } /* */ else { /* 175 */ return CoderResult.malformedForLength(1); /* */ } /* */ } /* 178 */ return CoderResult.UNDERFLOW; /* */ } finally { /* 180 */ paramCharBuffer.position(i); /* */ } /* */ }
/** * Parses a UCS-4 character from the given source buffer, handling surrogates. * * @param c The first character * @param ia The input array, from which one more character will be consumed if c is a high * surrogate * @param ip The input index * @param il The input limit * @returns Either a parsed UCS-4 character, in which case the isPair() and increment() methods * will return meaningful values, or -1, in which case error() will return a descriptive * result object */ public int parse(char c, char[] ia, int ip, int il) { assert (ia[ip] == c); if (Character.isHighSurrogate(c)) { if (il - ip < 2) { error = CoderResult.UNDERFLOW; return -1; } char d = ia[ip + 1]; if (Character.isLowSurrogate(d)) { character = Character.toCodePoint(c, d); error = null; return character; } error = CoderResult.malformedForLength(1); return -1; } if (Character.isLowSurrogate(c)) { error = CoderResult.malformedForLength(1); return -1; } character = c; error = null; return character; }