/** * Visit all bytes of the given utf8 string calling the visitor when a character is decoded. * * <p>The acceptable input formats are controlled by the STRICTLY_CHECK_FORMAT, ALLOW_NORMAL_UTF8, * and ALLOW_PSEUDO_UTF8 flags. * * @param utf8 (pseudo-)utf8 byte array * @param visitor called when characters are decoded * @throws UTFDataFormatException if the (pseudo-)utf8 byte array is not valid (pseudo-)utf8 */ @Inline private static void visitUTF8(ByteBuffer utf8, UTF8CharacterVisitor visitor) throws UTFDataFormatException { while (utf8.hasRemaining()) { byte b = utf8.get(); if (STRICTLY_CHECK_FORMAT && !ALLOW_NORMAL_UTF8) { if (b == 0) { throwDataFormatException("0 byte encountered", utf8.position() - 1); } } if (b >= 0) { // < 0x80 unsigned // in the range '\001' to '\177' visitor.visit_char((char) b); continue; } try { byte nb = utf8.get(); if (b < -32) { // < 0xe0 unsigned // '\000' or in the range '\200' to '\u07FF' char c = (char) (((b & 0x1f) << 6) | (nb & 0x3f)); visitor.visit_char(c); if (STRICTLY_CHECK_FORMAT) { if (((b & 0xe0) != 0xc0) || ((nb & 0xc0) != 0x80)) { throwDataFormatException( "invalid marker bits for double byte char", utf8.position() - 2); } if (c < '\200') { if (!ALLOW_PSEUDO_UTF8 || (c != '\000')) { throwDataFormatException( "encountered double byte char that should have been single byte", utf8.position() - 2); } } else if (c > '\u07FF') { throwDataFormatException( "encountered double byte char that should have been single byte", utf8.position() - 2); } } } else { byte nnb = utf8.get(); // in the range '\u0800' to '\uFFFF' char c = (char) (((b & 0x0f) << 12) | ((nb & 0x3f) << 6) | (nnb & 0x3f)); visitor.visit_char(c); if (STRICTLY_CHECK_FORMAT) { if (((b & 0xf0) != 0xe0) || ((nb & 0xc0) != 0x80) || ((nnb & 0xc0) != 0x80)) { throwDataFormatException( "invalid marker bits for triple byte char", utf8.position() - 3); } if (c < '\u0800') { throwDataFormatException( "encountered triple byte char that should have been fewer bytes", utf8.position() - 3); } } } } catch (ArrayIndexOutOfBoundsException e) { throwDataFormatException("unexpected end", utf8.position()); } } }
/** * Convert the given sequence of (pseudo-)utf8 formatted bytes into a String. * * <p>The acceptable input formats are controlled by the STRICTLY_CHECK_FORMAT, ALLOW_NORMAL_UTF8, * and ALLOW_PSEUDO_UTF8 flags. * * @param utf8 (pseudo-)utf8 byte array * @throws UTFDataFormatException if the (pseudo-)utf8 byte array is not valid (pseudo-)utf8 * @return unicode string */ public static String fromUTF8(ByteBuffer utf8) throws UTFDataFormatException { UTF8CharacterVisitor visitor = new ByteBufferStringEncoderVisitor(utf8.remaining()); visitUTF8(utf8, visitor); return visitor.toString(); }
/** * Convert the given sequence of (pseudo-)utf8 formatted bytes into a String. * * <p>The acceptable input formats are controlled by the STRICTLY_CHECK_FORMAT, ALLOW_NORMAL_UTF8, * and ALLOW_PSEUDO_UTF8 flags. * * @param utf8 (pseudo-)utf8 byte array * @throws UTFDataFormatException if the (pseudo-)utf8 byte array is not valid (pseudo-)utf8 * @return unicode string */ public static String fromUTF8(byte[] utf8) throws UTFDataFormatException { UTF8CharacterVisitor visitor = new ByteArrayStringEncoderVisitor(utf8.length); visitUTF8(utf8, visitor); return visitor.toString(); }