예제 #1
 public void insertString(FilterBypass fb, int offset, String string, AttributeSet attr)
     throws BadLocationException {
   StringBuilder builder = new StringBuilder(string);
   for (int i = builder.length() - 1; i >= 0; i--) {
     int cp = builder.codePointAt(i);
     if (!Character.isDigit(cp) && cp != '-') {
       if (Character.isSupplementaryCodePoint(cp)) {
   super.insertString(fb, offset, builder.toString(), attr);
   * 用本函数代替{@link #setText(CharSequence)}
   * @param cs
  public void setMText(CharSequence cs) {
    text = cs;

    //	contentList.clear();

    ArrayList<IS> isList = new ArrayList<MTextView.IS>();
    useDefault = false;
    if (cs instanceof SpannableString) {
      SpannableString ss = (SpannableString) cs;
      ImageSpan[] imageSpans = ss.getSpans(0, ss.length(), ImageSpan.class);
      for (int i = 0; i < imageSpans.length; i++) {
        int s = ss.getSpanStart(imageSpans[i]);
        int e = ss.getSpanEnd(imageSpans[i]);
        IS iS = new IS();
        iS.is = imageSpans[i];
        iS.start = s;
        iS.end = e;

    String str = cs.toString();

    for (int i = 0, j = 0; i < cs.length(); ) {
      if (j < isList.size()) {
        IS is = isList.get(j);
        if (i < is.start) {
          Integer cp = str.codePointAt(i);
          // 支持增补字符
          if (Character.isSupplementaryCodePoint(cp)) {
            i += 2;
          } else {

          obList.add(new String(Character.toChars(cp)));

        } else if (i >= is.start) {
          i = is.end;
      } else {
        Integer cp = str.codePointAt(i);
        if (Character.isSupplementaryCodePoint(cp)) {
          i += 2;
        } else {

        obList.add(new String(Character.toChars(cp)));

예제 #3
 // @VisibleForTesting
 public static boolean isValidUriCharset(String uri) {
   int len = uri.length();
   int i = 0;
   while (i < len) {
     int codePoint = uri.codePointAt(i);
     i += Character.charCount(codePoint);
     if (Character.isSupplementaryCodePoint(codePoint)) {
     if (HREF_DISCRETE_UCSCHAR.indexOf(codePoint) >= 0) {
     // iunreserved ranges
     if (('a' <= codePoint && codePoint <= 'z')
         || ('A' <= codePoint && codePoint <= 'Z')
         || ('0' <= codePoint && codePoint <= '9')) {
     // href-ucschar ranges
     if ((0 <= codePoint && codePoint <= 0x1F)
         || (0x7F <= codePoint && codePoint <= 0xD7FF)
         || (0xE000 <= codePoint && codePoint <= 0xFFFD)) {
     return false;
   return true;
 private boolean mustEscapeCharInJsString(int codepoint) {
   if (!Character.isSupplementaryCodePoint(codepoint)) {
     char c = (char) codepoint;
     return JS_ESCAPE_CHARS.contains(c);
   return false;
예제 #5
   * Returns the escaped form of a given literal string, starting at the given index. This method is
   * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
   * protected to allow subclasses to override the fastpath escaping function to inline their
   * escaping test. See {@link CharEscaperBuilder} for an example usage.
   * <p>This method is not reentrant and may only be invoked by the top level {@link
   * #escape(String)} method.
   * @param s the literal string to be escaped
   * @param index the index to start escaping from
   * @return the escaped form of {@code string}
   * @throws NullPointerException if {@code string} is null
   * @throws IllegalArgumentException if invalid surrogate characters are encountered
  protected final String escapeSlow(String s, int index) {
    int end = s.length();

    // Get a destination buffer and setup some loop variables.
    char[] dest = DEST_TL.get();
    int destIndex = 0;
    int unescapedChunkStart = 0;

    while (index < end) {
      int cp = codePointAt(s, index, end);
      if (cp < 0) {
        throw new IllegalArgumentException("Trailing high surrogate at end of input");
      // It is possible for this to return null because nextEscapeIndex() may
      // (for performance reasons) yield some false positives but it must never
      // give false negatives.
      char[] escaped = escape(cp);
      int nextIndex = index + (Character.isSupplementaryCodePoint(cp) ? 2 : 1);
      if (escaped != null) {
        int charsSkipped = index - unescapedChunkStart;

        // This is the size needed to add the replacement, not the full
        // size needed by the string.  We only regrow when we absolutely must.
        int sizeNeeded = destIndex + charsSkipped + escaped.length;
        if (dest.length < sizeNeeded) {
          int destLength = sizeNeeded + (end - index) + DEST_PAD;
          dest = growBuffer(dest, destIndex, destLength);
        // If we have skipped any characters, we need to copy them now.
        if (charsSkipped > 0) {
          s.getChars(unescapedChunkStart, index, dest, destIndex);
          destIndex += charsSkipped;
        if (escaped.length > 0) {
          System.arraycopy(escaped, 0, dest, destIndex, escaped.length);
          destIndex += escaped.length;
        // If we dealt with an escaped character, reset the unescaped range.
        unescapedChunkStart = nextIndex;
      index = nextEscapeIndex(s, nextIndex, end);

    // Process trailing unescaped characters - no need to account for escaped
    // length or padding the allocation.
    int charsSkipped = end - unescapedChunkStart;
    if (charsSkipped > 0) {
      int endIndex = destIndex + charsSkipped;
      if (dest.length < endIndex) {
        dest = growBuffer(dest, destIndex, endIndex);
      s.getChars(unescapedChunkStart, end, dest, destIndex);
      destIndex = endIndex;
    return new String(dest, 0, destIndex);
예제 #6
  * Scans a sub-sequence of characters from a given {@link CharSequence}, returning the index of
  * the next character that requires escaping.
  * <p><b>Note:</b> When implementing an escaper, it is a good idea to override this method for
  * efficiency. The base class implementation determines successive Unicode code points and invokes
  * {@link #escape(int)} for each of them. If the semantics of your escaper are such that code
  * points in the supplementary range are either all escaped or all unescaped, this method can be
  * implemented more efficiently using {@link CharSequence#charAt(int)}.
  * <p>Note however that if your escaper does not escape characters in the supplementary range, you
  * should either continue to validate the correctness of any surrogate characters encountered or
  * provide a clear warning to users that your escaper does not validate its input.
  * <p>See {@link PercentEscaper} for an example.
  * @param csq a sequence of characters
  * @param start the index of the first character to be scanned
  * @param end the index immediately after the last character to be scanned
  * @throws IllegalArgumentException if the scanned sub-sequence of {@code csq} contains invalid
  *     surrogate pairs
 protected int nextEscapeIndex(CharSequence csq, int start, int end) {
   int index = start;
   while (index < end) {
     int cp = codePointAt(csq, index, end);
     if (cp < 0 || escape(cp) != null) {
     index += Character.isSupplementaryCodePoint(cp) ? 2 : 1;
   return index;
   * Appends the Unicode hex escape sequence for the given code point (backslash + 'u' + 4 hex
   * digits) to the given StringBuilder.
   * <p>Note: May append 2 escape sequences (surrogate pair) in the case of a supplementary
   * character (outside the Unicode BMP).
   * <p>Adapted from StringUtil.appendHexJavaScriptRepresentation().
   * @param out The StringBuilder to append to.
   * @param codePoint The Unicode code point whose hex escape sequence to append.
  public static void appendHexEscape(StringBuilder out, int codePoint) {

    if (Character.isSupplementaryCodePoint(codePoint)) {
      // Handle supplementary unicode values which are not representable in
      // javascript.  We deal with these by escaping them as two 4B sequences
      // so that they will round-trip properly when sent from java to javascript
      // and back.
      char[] surrogates = Character.toChars(codePoint);
      appendHexEscape(out, surrogates[0]);
      appendHexEscape(out, surrogates[1]);

    } else {
          .append(HEX_DIGITS[(codePoint >>> 12) & 0xF])
          .append(HEX_DIGITS[(codePoint >>> 8) & 0xF])
          .append(HEX_DIGITS[(codePoint >>> 4) & 0xF])
          .append(HEX_DIGITS[codePoint & 0xF]);
예제 #8
  * Replaces supplementary characters with a ? character
  * @param text
  * @return
 public static String replaceSupplementaryCharacters(final String text) {
   if (text == null) {
     return null;
   final int len = text.length();
   boolean isSupplementary = false;
   final StringBuilder result = new StringBuilder();
   for (int i = 0; i < len; i++) {
     final int cp = Character.codePointAt(text, i);
     isSupplementary = Character.isSupplementaryCodePoint(cp);
     if (isSupplementary) {
     } else {
   return result.toString();
예제 #9
  public static final String filterUCS4(String str) {
    if (TextUtils.isEmpty(str)) {
      return str;

    if (str.codePointCount(0, str.length()) == str.length()) {
      return str;

    StringBuilder sb = new StringBuilder();

    int index = 0;
    while (index < str.length()) {
      int codePoint = str.codePointAt(index);
      index += Character.charCount(codePoint);
      if (Character.isSupplementaryCodePoint(codePoint)) {


    return sb.toString();
예제 #10
    // returns -1 if there is/are malformed byte(s) and the
    // "action" for malformed input is not REPLACE.
    public int decode(byte[] sa, int sp, int len, char[] da) {
      final int sl = sp + len;
      int dp = 0;
      int dlASCII = Math.min(len, da.length);
      ByteBuffer bb = null; // only necessary if malformed

      // ASCII only optimized loop
      while (dp < dlASCII && sa[sp] >= 0) da[dp++] = (char) sa[sp++];

      while (sp < sl) {
        int b1 = sa[sp++];
        if (b1 >= 0) {
          // 1 byte, 7 bits: 0xxxxxxx
          da[dp++] = (char) b1;
        } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
          // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
          if (sp < sl) {
            int b2 = sa[sp++];
            if (isNotContinuation(b2)) {
              if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;
              da[dp++] = replacement().charAt(0);
              sp--; // malformedN(bb, 2) always returns 1
            } else {
              da[dp++] = (char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
          if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;
          da[dp++] = replacement().charAt(0);
          return dp;
        } else if ((b1 >> 4) == -2) {
          // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
          if (sp + 1 < sl) {
            int b2 = sa[sp++];
            int b3 = sa[sp++];
            if (isMalformed3(b1, b2, b3)) {
              if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;
              da[dp++] = replacement().charAt(0);
              sp -= 3;
              bb = getByteBuffer(bb, sa, sp);
              sp += malformedN(bb, 3).length();
            } else {
              char c =
                      ((b1 << 12)
                          ^ (b2 << 6)
                          ^ (b3 ^ (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
              if (isSurrogate(c)) {
                if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;
                da[dp++] = replacement().charAt(0);
              } else {
                da[dp++] = c;
          if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;
          if (sp < sl && isMalformed3_2(b1, sa[sp])) {
            da[dp++] = replacement().charAt(0);
          da[dp++] = replacement().charAt(0);
          return dp;
        } else if ((b1 >> 3) == -2) {
          // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
          if (sp + 2 < sl) {
            int b2 = sa[sp++];
            int b3 = sa[sp++];
            int b4 = sa[sp++];
            int uc =
                ((b1 << 18)
                    ^ (b2 << 12)
                    ^ (b3 << 6)
                    ^ (b4
                        ^ (((byte) 0xF0 << 18)
                            ^ ((byte) 0x80 << 12)
                            ^ ((byte) 0x80 << 6)
                            ^ ((byte) 0x80))));
            if (isMalformed4(b2, b3, b4)
                // shortest form check
                !Character.isSupplementaryCodePoint(uc)) {
              if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;
              da[dp++] = replacement().charAt(0);
              sp -= 4;
              bb = getByteBuffer(bb, sa, sp);
              sp += malformedN(bb, 4).length();
            } else {
              da[dp++] = highSurrogate(uc);
              da[dp++] = lowSurrogate(uc);
          if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;

          if (sp < sl && isMalformed4_2(b1, sa[sp])) {
            da[dp++] = replacement().charAt(0);
          if (sp < sl && isMalformed4_3(sa[sp])) {
            da[dp++] = replacement().charAt(0);
          da[dp++] = replacement().charAt(0);
          return dp;
        } else {
          if (malformedInputAction() != CodingErrorAction.REPLACE) return -1;
          da[dp++] = replacement().charAt(0);
      return dp;
예제 #11
 private CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
   int mark = src.position();
   int limit = src.limit();
   while (mark < limit) {
     int b1 = src.get();
     if (b1 >= 0) {
       // 1 byte, 7 bits: 0xxxxxxx
       if (dst.remaining() < 1) return xflow(src, mark, 1); // overflow
       dst.put((char) b1);
     } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
       // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
       if (limit - mark < 2 || dst.remaining() < 1) return xflow(src, mark, 2);
       int b2 = src.get();
       if (isNotContinuation(b2)) return malformedForLength(src, mark, 1);
       dst.put((char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80))));
       mark += 2;
     } else if ((b1 >> 4) == -2) {
       // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
       int srcRemaining = limit - mark;
       if (srcRemaining < 3 || dst.remaining() < 1) {
         if (srcRemaining > 1 && isMalformed3_2(b1, src.get()))
           return malformedForLength(src, mark, 1);
         return xflow(src, mark, 3);
       int b2 = src.get();
       int b3 = src.get();
       if (isMalformed3(b1, b2, b3)) return malformed(src, mark, 3);
       char c =
               ((b1 << 12)
                   ^ (b2 << 6)
                   ^ (b3 ^ (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
       if (isSurrogate(c)) return malformedForLength(src, mark, 3);
       mark += 3;
     } else if ((b1 >> 3) == -2) {
       // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
       int srcRemaining = limit - mark;
       if (srcRemaining < 4 || dst.remaining() < 2) {
         if (srcRemaining > 1 && isMalformed4_2(b1, src.get()))
           return malformedForLength(src, mark, 1);
         if (srcRemaining > 2 && isMalformed4_3(src.get()))
           return malformedForLength(src, mark, 2);
         return xflow(src, mark, 4);
       int b2 = src.get();
       int b3 = src.get();
       int b4 = src.get();
       int uc =
           ((b1 << 18)
               ^ (b2 << 12)
               ^ (b3 << 6)
               ^ (b4
                   ^ (((byte) 0xF0 << 18)
                       ^ ((byte) 0x80 << 12)
                       ^ ((byte) 0x80 << 6)
                       ^ ((byte) 0x80))));
       if (isMalformed4(b2, b3, b4)
           // shortest form check
           !Character.isSupplementaryCodePoint(uc)) {
         return malformed(src, mark, 4);
       mark += 4;
     } else {
       return malformed(src, mark, 1);
   return xflow(src, mark, 0);
예제 #12
    private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
      // This method is optimized for ASCII input.
      byte[] sa = src.array();
      int sp = src.arrayOffset() + src.position();
      int sl = src.arrayOffset() + src.limit();

      char[] da = dst.array();
      int dp = dst.arrayOffset() + dst.position();
      int dl = dst.arrayOffset() + dst.limit();
      int dlASCII = dp + Math.min(sl - sp, dl - dp);

      // ASCII only loop
      while (dp < dlASCII && sa[sp] >= 0) da[dp++] = (char) sa[sp++];
      while (sp < sl) {
        int b1 = sa[sp];
        if (b1 >= 0) {
          // 1 byte, 7 bits: 0xxxxxxx
          if (dp >= dl) return xflow(src, sp, sl, dst, dp, 1);
          da[dp++] = (char) b1;
        } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
          // 2 bytes, 11 bits: 110xxxxx 10xxxxxx
          if (sl - sp < 2 || dp >= dl) return xflow(src, sp, sl, dst, dp, 2);
          int b2 = sa[sp + 1];
          if (isNotContinuation(b2)) return malformedForLength(src, sp, dst, dp, 1);
          da[dp++] = (char) (((b1 << 6) ^ b2) ^ (((byte) 0xC0 << 6) ^ ((byte) 0x80)));
          sp += 2;
        } else if ((b1 >> 4) == -2) {
          // 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
          int srcRemaining = sl - sp;
          if (srcRemaining < 3 || dp >= dl) {
            if (srcRemaining > 1 && isMalformed3_2(b1, sa[sp + 1]))
              return malformedForLength(src, sp, dst, dp, 1);
            return xflow(src, sp, sl, dst, dp, 3);
          int b2 = sa[sp + 1];
          int b3 = sa[sp + 2];
          if (isMalformed3(b1, b2, b3)) return malformed(src, sp, dst, dp, 3);
          char c =
                  ((b1 << 12)
                      ^ (b2 << 6)
                      ^ (b3 ^ (((byte) 0xE0 << 12) ^ ((byte) 0x80 << 6) ^ ((byte) 0x80))));
          if (isSurrogate(c)) return malformedForLength(src, sp, dst, dp, 3);
          da[dp++] = c;
          sp += 3;
        } else if ((b1 >> 3) == -2) {
          // 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
          int srcRemaining = sl - sp;
          if (srcRemaining < 4 || dl - dp < 2) {
            if (srcRemaining > 1 && isMalformed4_2(b1, sa[sp + 1]))
              return malformedForLength(src, sp, dst, dp, 1);
            if (srcRemaining > 2 && isMalformed4_3(sa[sp + 2]))
              return malformedForLength(src, sp, dst, dp, 2);
            return xflow(src, sp, sl, dst, dp, 4);
          int b2 = sa[sp + 1];
          int b3 = sa[sp + 2];
          int b4 = sa[sp + 3];
          int uc =
              ((b1 << 18)
                  ^ (b2 << 12)
                  ^ (b3 << 6)
                  ^ (b4
                      ^ (((byte) 0xF0 << 18)
                          ^ ((byte) 0x80 << 12)
                          ^ ((byte) 0x80 << 6)
                          ^ ((byte) 0x80))));
          if (isMalformed4(b2, b3, b4)
              // shortest form check
              !Character.isSupplementaryCodePoint(uc)) {
            return malformed(src, sp, dst, dp, 4);
          da[dp++] = highSurrogate(uc);
          da[dp++] = lowSurrogate(uc);
          sp += 4;
        } else return malformed(src, sp, dst, dp, 1);
      return xflow(src, sp, sl, dst, dp, 0);
   * Get the number of brute-force iterations needed to arrive at the given password. See the class
   * description for the algorithm used to determine the count.
   * @param passwordPlaintext the password to calculate the count for
   * @param bypassLengthLimitCheck true to ignore the hard-coded
   * @return a <code>BigInteger</code> representing the number of iterations
   * @throws MaximumPasswordLengthExceededException if <code>bypassLengthLimitCheck</code> is <code>
   *     true</code> and the length of <code>passwordPlaintext</code> exceeds <code>
  public BigInteger iterationCount(String passwordPlaintext, boolean bypassLengthLimitCheck)
      throws MaximumPasswordLengthExceededException {
    if (null == passwordPlaintext || passwordPlaintext.length() < 1) {
      return new BigInteger("0");

    int passwordLength = Character.codePointCount(passwordPlaintext, 0, passwordPlaintext.length());
    if (!bypassLengthLimitCheck && passwordLength > PASSWORD_LENGTH_LIMIT) {
      throw new MaximumPasswordLengthExceededException();

    PasswordCharacterRange range = new PasswordCharacterRange(passwordPlaintext);
    BigInteger rangeSize = new BigInteger(Long.toString(range.size()));

    // determine number of iterations required for brute force attack
    // within this character range
    BigInteger result;

    BigInteger partialSumInner = rangeSize.pow(passwordLength - 1).subtract(new BigInteger("1"));

    BigDecimal partialSumMultiplier = new BigDecimal(range.size());
    partialSumMultiplier =
            partialSumMultiplier.subtract(new BigDecimal("1")),
    BigDecimal partialSumResult = partialSumMultiplier.multiply(new BigDecimal(partialSumInner));
    result = partialSumResult.setScale(0, RoundingMode.HALF_UP).toBigIntegerExact();

    if (verifyPartialSumResult) {
      BigInteger slowResult = new BigInteger("0");
      for (int i = 1; i < passwordLength; i++) {
        BigInteger iteration = rangeSize.pow(i);
        slowResult = slowResult.add(iteration);

      boolean resultsMatch = result.compareTo(slowResult) == 0;
      if (!resultsMatch) {
        throw new RuntimeException("Values didn't match on password with length " + passwordLength);

    for (int i = 1, supplementalCharCount = 0; i <= passwordPlaintext.length(); i++) {
      int power = passwordLength - (i - supplementalCharCount);
      int codePoint = passwordPlaintext.codePointAt(i - 1);
      long placeValue = range.position(codePoint);

      if (Character.isSupplementaryCodePoint(codePoint)) {
        // skip low-surrogate code unit

      if (power == 0 && placeValue == 0) {

      BigInteger multiplier = rangeSize.pow(power);
      BigInteger iteration = new BigInteger(Long.toString(placeValue)).multiply(multiplier);
      result = result.add(iteration);

    return result.add(new BigInteger("1"));
예제 #14
  * Escape control characters in a string and append them to the string buffer
  * @param string String to be written
  * @param sb String builder
  * @throws CharConversionException Invalid Unicode character
 private static void escapeString(String string, StringBuilder sb) throws CharConversionException {
   if (string.length() == 0) {
   // Find the next special character in the string
   int start = 0;
   Matcher matcher = pattern.matcher(string);
   while (matcher.find(start)) {
     int pos = matcher.start();
     if (pos > start) {
       sb.append(string.substring(start, pos));
     start = pos + 1;
     // Check for a valid Unicode codepoint
     int ch = string.codePointAt(pos);
     if (!Character.isValidCodePoint(ch)) {
       throw new CharConversionException("Invalid Unicode character in JSON string value");
     // Process a supplementary codepoint
     if (Character.isSupplementaryCodePoint(ch)) {
     // Escape control characters
     char c = string.charAt(pos);
     switch (c) {
       case '"':
       case '\\':
       case '\b':
       case '\f':
       case '\n':
       case '\r':
       case '\t':
       case '/':
         if ((c >= '\u0000' && c <= '\u001F')
             || (c >= '\u007F' && c <= '\u009F')
             || (c >= '\u2000' && c <= '\u20FF')) {
           sb.append("\\u").append(String.format("%04X", (int) c));
         } else {
   // Append the remainder of the string
   if (start == 0) {
   } else if (start < string.length()) {