示例#1
0
 /** NOTE: The sourceX2 is exclusive. */
 public void copyInterval(TerminalRow line, int sourceX1, int sourceX2, int destinationX) {
   final int x1 = line.findStartOfColumn(sourceX1);
   final int x2 = line.findStartOfColumn(sourceX2);
   boolean startingFromSecondHalfOfWideChar =
       (sourceX1 > 0 && line.wideDisplayCharacterStartingAt(sourceX1 - 1));
   final char[] sourceChars =
       (this == line) ? Arrays.copyOf(line.mText, line.mText.length) : line.mText;
   int latestNonCombiningWidth = 0;
   for (int i = x1; i < x2; i++) {
     char sourceChar = sourceChars[i];
     int codePoint =
         Character.isHighSurrogate(sourceChar)
             ? Character.toCodePoint(sourceChar, sourceChars[++i])
             : sourceChar;
     if (startingFromSecondHalfOfWideChar) {
       // Just treat copying second half of wide char as copying whitespace.
       codePoint = ' ';
       startingFromSecondHalfOfWideChar = false;
     }
     int w = WcWidth.width(codePoint);
     if (w > 0) {
       destinationX += latestNonCombiningWidth;
       sourceX1 += latestNonCombiningWidth;
       latestNonCombiningWidth = w;
     }
     setChar(destinationX, codePoint, line.getStyle(sourceX1));
   }
 }
示例#2
0
 /**
  * Returns the Unicode code point of the character at the given index.
  *
  * <p>Unlike {@link Character#codePointAt(CharSequence, int)} or {@link String#codePointAt(int)}
  * this method will never fail silently when encountering an invalid surrogate pair.
  *
  * <p>The behaviour of this method is as follows:
  *
  * <ol>
  *   <li>If {@code index >= end}, {@link IndexOutOfBoundsException} is thrown.
  *   <li><b>If the character at the specified index is not a surrogate, it is returned.</b>
  *   <li>If the first character was a high surrogate value, then an attempt is made to read the
  *       next character.
  *       <ol>
  *         <li><b>If the end of the sequence was reached, the negated value of the trailing high
  *             surrogate is returned.</b>
  *         <li><b>If the next character was a valid low surrogate, the code point value of the
  *             high/low surrogate pair is returned.</b>
  *         <li>If the next character was not a low surrogate value, then {@link
  *             IllegalArgumentException} is thrown.
  *       </ol>
  *   <li>If the first character was a low surrogate value, {@link IllegalArgumentException} is
  *       thrown.
  * </ol>
  *
  * @param seq the sequence of characters from which to decode the code point
  * @param index the index of the first character to decode
  * @param end the index beyond the last valid character to decode
  * @return the Unicode code point for the given index or the negated value of the trailing high
  *     surrogate character at the end of the sequence
  */
 protected static final int codePointAt(CharSequence seq, int index, int end) {
   if (index < end) {
     char c1 = seq.charAt(index++);
     if (c1 < Character.MIN_HIGH_SURROGATE || c1 > Character.MAX_LOW_SURROGATE) {
       // Fast path (first test is probably all we need to do)
       return c1;
     } else if (c1 <= Character.MAX_HIGH_SURROGATE) {
       // If the high surrogate was the last character, return its inverse
       if (index == end) {
         return -c1;
       }
       // Otherwise look for the low surrogate following it
       char c2 = seq.charAt(index);
       if (Character.isLowSurrogate(c2)) {
         return Character.toCodePoint(c1, c2);
       }
       throw new IllegalArgumentException(
           "Expected low surrogate but got char '"
               + c2
               + "' with value "
               + (int) c2
               + " at index "
               + index);
     } else {
       throw new IllegalArgumentException(
           "Unexpected low surrogate character '"
               + c1
               + "' with value "
               + (int) c1
               + " at index "
               + (index - 1));
     }
   }
   throw new IndexOutOfBoundsException("Index exceeds specified range");
 }
示例#3
0
 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
   int mark = src.position();
   if (!doneBOM && src.hasRemaining()) {
     if (dst.remaining() < 4) return CoderResult.OVERFLOW;
     put(BOM_BIG, dst);
     doneBOM = true;
   }
   try {
     while (src.hasRemaining()) {
       char c = src.get();
       if (!Character.isSurrogate(c)) {
         if (dst.remaining() < 4) return CoderResult.OVERFLOW;
         mark++;
         put(c, dst);
       } else if (Character.isHighSurrogate(c)) {
         if (!src.hasRemaining()) return CoderResult.UNDERFLOW;
         char low = src.get();
         if (Character.isLowSurrogate(low)) {
           if (dst.remaining() < 4) return CoderResult.OVERFLOW;
           mark += 2;
           put(Character.toCodePoint(c, low), dst);
         } else {
           return CoderResult.malformedForLength(1);
         }
       } else {
         // assert Character.isLowSurrogate(c);
         return CoderResult.malformedForLength(1);
       }
     }
     return CoderResult.UNDERFLOW;
   } finally {
     src.position(mark);
   }
 }
示例#4
0
 public int next() {
   char ch = data.charAt(index++);
   if (Character.isHighSurrogate(ch)) {
     int ret = Character.toCodePoint(ch, data.charAt(index++));
     index += 2;
     return ret;
   } else {
     return ch;
   }
 }
 /** Returns current character */
 int getCurrent() {
   char c1 = text.current();
   if (Character.isHighSurrogate(c1) && text.getIndex() < text.getEndIndex()) {
     char c2 = text.next();
     text.previous();
     if (Character.isLowSurrogate(c2)) {
       return Character.toCodePoint(c1, c2);
     }
   }
   return (int) c1;
 }
 /** Returns previous character */
 private int getPrevious() {
   char c2 = text.previous();
   if (Character.isLowSurrogate(c2) && text.getIndex() > text.getBeginIndex()) {
     char c1 = text.previous();
     if (Character.isHighSurrogate(c1)) {
       return Character.toCodePoint(c1, c2);
     } else {
       text.next();
     }
   }
   return (int) c2;
 }
示例#7
0
文件: Token.java 项目: nikhi/basex
 /**
  * Converts a string to a UTF8 byte array.
  *
  * @param string string to be converted
  * @return byte array
  */
 private static byte[] utf8(final String string) {
   final char[] arr = string.toCharArray();
   final int al = arr.length;
   final TokenBuilder tb = new TokenBuilder(al << 1);
   for (int c = 0; c < al; ++c) {
     final char ch = arr[c];
     tb.add(
         Character.isHighSurrogate(ch) && c < al - 1 && Character.isLowSurrogate(arr[c + 1])
             ? Character.toCodePoint(ch, arr[++c])
             : ch);
   }
   return tb.finish();
 }
示例#8
0
 public int previousCodePoint() {
   int ch1 = previous();
   if (Character.isLowSurrogate((char) ch1)) {
     int ch2 = previous();
     if (Character.isHighSurrogate((char) ch2)) {
       return Character.toCodePoint((char) ch2, (char) ch1);
     } else if (ch2 != DONE) {
       // unmatched trail surrogate so back out
       next();
     }
   }
   return ch1;
 }
示例#9
0
  /** Note that the column may end of second half of wide character. */
  public int findStartOfColumn(int column) {
    if (column == mColumns) return getSpaceUsed();

    int currentColumn = 0;
    int currentCharIndex = 0;
    while (true) { // 0<2 1 < 2
      int newCharIndex = currentCharIndex;
      char c = mText[newCharIndex++]; // cci=1, cci=2
      boolean isHigh = Character.isHighSurrogate(c);
      int codePoint = isHigh ? Character.toCodePoint(c, mText[newCharIndex++]) : c;
      int wcwidth = WcWidth.width(codePoint); // 1, 2
      if (wcwidth > 0) {
        currentColumn += wcwidth;
        if (currentColumn == column) {
          while (newCharIndex < mSpaceUsed) {
            // Skip combining chars.
            if (Character.isHighSurrogate(mText[newCharIndex])) {
              if (WcWidth.width(Character.toCodePoint(mText[newCharIndex], mText[newCharIndex + 1]))
                  <= 0) {
                newCharIndex += 2;
              } else {
                break;
              }
            } else if (WcWidth.width(mText[newCharIndex]) <= 0) {
              newCharIndex++;
            } else {
              break;
            }
          }
          return newCharIndex;
        } else if (currentColumn > column) {
          // Wide column going past end.
          return currentCharIndex;
        }
      }
      currentCharIndex = newCharIndex;
    }
  }
示例#10
0
 private boolean wideDisplayCharacterStartingAt(int column) {
   for (int currentCharIndex = 0, currentColumn = 0; currentCharIndex < mSpaceUsed; ) {
     char c = mText[currentCharIndex++];
     int codePoint =
         Character.isHighSurrogate(c) ? Character.toCodePoint(c, mText[currentCharIndex++]) : c;
     int wcwidth = WcWidth.width(codePoint);
     if (wcwidth > 0) {
       if (currentColumn == column && wcwidth == 2) return true;
       currentColumn += wcwidth;
       if (currentColumn > column) return false;
     }
   }
   return false;
 }
示例#11
0
  private int[] calculateNewlineIndicesAndCheckCodePoints(StringBuilder inputData)
      throws SnuggleParseException {
    List<Integer> newlineIndicesBuilder = new ArrayList<Integer>();
    newlineIndicesBuilder.add(Integer.valueOf(-1));
    char lastChar = 0;
    char thisChar; /* (16 bit char only) */
    int codePoint; /* (Full Unicode code point */
    for (int i = 0, length = inputData.length(); i < length; i++, lastChar = thisChar) {
      thisChar = inputData.charAt(i);
      if (thisChar == '\n') {
        newlineIndicesBuilder.add(Integer.valueOf(i));
      }
      if (Character.isHighSurrogate(lastChar)) {
        if (Character.isLowSurrogate(thisChar)) {
          codePoint = Character.toCodePoint(lastChar, thisChar);
        } else {
          /* Error: last was bad surrogate character */
          recordSurrogateError(inputData, i - 1, lastChar);
          continue;
        }
      } else if (Character.isLowSurrogate(thisChar)) {
        /* Error: this is bad surrogate character */
        recordSurrogateError(inputData, i, thisChar);
        continue;
      } else {
        codePoint = thisChar;
      }
      /* Check that we allow this codepoint */
      if (Character.isISOControl(codePoint)
          && !(codePoint == '\r' || codePoint == '\n' || codePoint == '\t')) {
        sessionContext.registerError(
            new InputError(
                CoreErrorCode.TTEG02, null, Integer.toHexString(codePoint), Integer.valueOf(i)));
        inputData.setCharAt(i, ' ');
      }
    }
    /* Make sure last character wasn't surrogate pair starter */
    if (Character.isHighSurrogate(lastChar)) {
      recordSurrogateError(inputData, inputData.length() - 1, lastChar);
    }

    /* Finally store newline information */
    int[] calculatedNewlineIndices = new int[newlineIndicesBuilder.size()];
    for (int i = 0; i < calculatedNewlineIndices.length; i++) {
      calculatedNewlineIndices[i] = newlineIndicesBuilder.get(i);
    }
    return calculatedNewlineIndices;
  }
示例#12
0
 /**
  * Writes the HTML equivalent of the given plain text to output. For example, {@code
  * escapeHtmlOnto("1 < 2", w)}, is equivalent to {@code w.append("1 &lt; 2")} but possibly with
  * fewer smaller appends. Elides code-units that are not valid XML Characters.
  *
  * @see <a href="http://www.w3.org/TR/2008/REC-xml-20081126/#charsets">XML Ch. 2.2 -
  *     Characters</a>
  */
 @TCB
 static void encodeHtmlOnto(String plainText, Appendable output) throws IOException {
   int n = plainText.length();
   int pos = 0;
   for (int i = 0; i < n; ++i) {
     char ch = plainText.charAt(i);
     if (ch < REPLACEMENTS.length) {
       String repl = REPLACEMENTS[ch];
       if (repl != null) {
         output.append(plainText, pos, i).append(repl);
         pos = i + 1;
       }
     } else if (((char) 0xd800) <= ch) {
       if (ch <= ((char) 0xdfff)) {
         char next;
         if (i + 1 < n && Character.isSurrogatePair(ch, next = plainText.charAt(i + 1))) {
           // Emit supplemental codepoints as entity so that they cannot
           // be mis-encoded as UTF-8 of surrogates instead of UTF-8 proper
           // and get involved in UTF-16/UCS-2 confusion.
           int codepoint = Character.toCodePoint(ch, next);
           output.append(plainText, pos, i);
           appendNumericEntity(codepoint, output);
           ++i;
           pos = i + 1;
         } else {
           output.append(plainText, pos, i);
           // Elide the orphaned surrogate.
           pos = i + 1;
         }
       } else if (0xff00 <= ch) {
         output.append(plainText, pos, i);
         pos = i + 1;
         // Is a control character or possible full-width version of a
         // special character.
         if ((ch & 0xfffe) == 0xfffe) {
           // Elide since not an the XML Character.
         } else {
           appendNumericEntity(ch, output);
         }
       }
     }
   }
   output.append(plainText, pos, n);
 }
示例#13
0
 /**
  * Parses a UCS-4 character from the given source buffer, handling surrogates.
  *
  * @param c The first character
  * @param in The source buffer, from which one more character will be consumed if c is a high
  *     surrogate
  * @returns Either a parsed UCS-4 character, in which case the isPair() and increment() methods
  *     will return meaningful values, or -1, in which case error() will return a descriptive
  *     result object
  */
 public int parse(char c, CharBuffer in) {
   if (Character.isHighSurrogate(c)) {
     if (!in.hasRemaining()) {
       error = CoderResult.UNDERFLOW;
       return -1;
     }
     char d = in.get();
     if (Character.isLowSurrogate(d)) {
       character = Character.toCodePoint(c, d);
       error = null;
       return character;
     }
     error = CoderResult.malformedForLength(1);
     return -1;
   }
   if (Character.isLowSurrogate(c)) {
     error = CoderResult.malformedForLength(1);
     return -1;
   }
   character = c;
   error = null;
   return character;
 }
 /*     */ protected CoderResult encodeLoop(
     CharBuffer paramCharBuffer, ByteBuffer paramByteBuffer) {
   /* 146 */ int i = paramCharBuffer.position();
   /* 147 */ if ((!this.doneBOM) && (paramCharBuffer.hasRemaining())) {
     /* 148 */ if (paramByteBuffer.remaining() < 4) /* 149 */ return CoderResult.OVERFLOW;
     /* 150 */ put(65279, paramByteBuffer);
     /* 151 */ this.doneBOM = true;
     /*     */ }
   /*     */ try {
     /* 154 */ while (paramCharBuffer.hasRemaining()) {
       /* 155 */ char c1 = paramCharBuffer.get();
       /*     */ CoderResult localCoderResult2;
       /* 156 */ if (!Character.isSurrogate(c1)) {
         /* 157 */ if (paramByteBuffer.remaining() < 4) /* 158 */ return CoderResult.OVERFLOW;
         /* 159 */ i++;
         /* 160 */ put(c1, paramByteBuffer);
         /* 161 */ } else if (Character.isHighSurrogate(c1)) {
         /* 162 */ if (!paramCharBuffer.hasRemaining()) /* 163 */ return CoderResult.UNDERFLOW;
         /* 164 */ char c2 = paramCharBuffer.get();
         /*     */ CoderResult localCoderResult4;
         /* 165 */ if (Character.isLowSurrogate(c2)) {
           /* 166 */ if (paramByteBuffer.remaining() < 4) /* 167 */ return CoderResult.OVERFLOW;
           /* 168 */ i += 2;
           /* 169 */ put(Character.toCodePoint(c1, c2), paramByteBuffer);
           /*     */ } else {
           /* 171 */ return CoderResult.malformedForLength(1);
           /*     */ }
         /*     */ }
       /*     */ else {
         /* 175 */ return CoderResult.malformedForLength(1);
         /*     */ }
       /*     */ }
     /* 178 */ return CoderResult.UNDERFLOW;
     /*     */ } finally {
     /* 180 */ paramCharBuffer.position(i);
     /*     */ }
   /*     */ }
示例#15
0
 /**
  * Parses a UCS-4 character from the given source buffer, handling surrogates.
  *
  * @param c The first character
  * @param ia The input array, from which one more character will be consumed if c is a high
  *     surrogate
  * @param ip The input index
  * @param il The input limit
  * @returns Either a parsed UCS-4 character, in which case the isPair() and increment() methods
  *     will return meaningful values, or -1, in which case error() will return a descriptive
  *     result object
  */
 public int parse(char c, char[] ia, int ip, int il) {
   assert (ia[ip] == c);
   if (Character.isHighSurrogate(c)) {
     if (il - ip < 2) {
       error = CoderResult.UNDERFLOW;
       return -1;
     }
     char d = ia[ip + 1];
     if (Character.isLowSurrogate(d)) {
       character = Character.toCodePoint(c, d);
       error = null;
       return character;
     }
     error = CoderResult.malformedForLength(1);
     return -1;
   }
   if (Character.isLowSurrogate(c)) {
     error = CoderResult.malformedForLength(1);
     return -1;
   }
   character = c;
   error = null;
   return character;
 }