public static void main(String[] arg) throws Exception { String string = "Now \u0690xxxxxxxx"; System.err.println(string); byte[] bytes = string.getBytes(StandardCharsets.UTF_8); System.err.println(new String(bytes)); System.err.println(bytes.length); long calc = 0; Utf8StringBuffer strbuf = new Utf8StringBuffer(bytes.length); for (int i = 0; i < 10; i++) { long s1 = System.currentTimeMillis(); for (int j = 1000000; j-- > 0; ) { calc += new String(bytes, 0, bytes.length, StandardCharsets.UTF_8).hashCode(); } long s2 = System.currentTimeMillis(); for (int j = 1000000; j-- > 0; ) { calc += StringUtil.toUTF8String(bytes, 0, bytes.length).hashCode(); } long s3 = System.currentTimeMillis(); for (int j = 1000000; j-- > 0; ) { Utf8StringBuffer buffer = new Utf8StringBuffer(bytes.length); buffer.append(bytes, 0, bytes.length); calc += buffer.toString().hashCode(); } long s4 = System.currentTimeMillis(); for (int j = 1000000; j-- > 0; ) { strbuf.reset(); strbuf.append(bytes, 0, bytes.length); calc += strbuf.toString().hashCode(); } long s5 = System.currentTimeMillis(); System.err.println((s2 - s1) + ", " + (s3 - s2) + ", " + (s4 - s3) + ", " + (s5 - s4)); } System.err.println(calc); }
@Test public void testUtfStringBuffer() throws Exception { String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty"; byte[] bytes = source.getBytes(StringUtil.__UTF8); Utf8StringBuffer buffer = new Utf8StringBuffer(); for (byte aByte : bytes) buffer.append(aByte); assertEquals(source, buffer.toString()); assertTrue(buffer.toString().endsWith("jetty")); }
@Test(expected = IllegalArgumentException.class) public void testUtf8WithMissingByte() throws Exception { String source = "abc\u10fb"; byte[] bytes = source.getBytes(StringUtil.__UTF8); Utf8StringBuffer buffer = new Utf8StringBuffer(); for (int i = 0; i < bytes.length - 1; i++) buffer.append(bytes[i]); buffer.toString(); }
@Test public void testUTF32codes() throws Exception { String source = "\uD842\uDF9F"; byte[] bytes = source.getBytes("UTF-8"); String jvmcheck = new String(bytes, 0, bytes.length, "UTF-8"); assertEquals(source, jvmcheck); Utf8StringBuffer buffer = new Utf8StringBuffer(); buffer.append(bytes, 0, bytes.length); String result = buffer.toString(); assertEquals(source, result); }
@Test public void testGermanUmlauts() throws Exception { byte[] bytes = new byte[6]; bytes[0] = (byte) 0xC3; bytes[1] = (byte) 0xBC; bytes[2] = (byte) 0xC3; bytes[3] = (byte) 0xB6; bytes[4] = (byte) 0xC3; bytes[5] = (byte) 0xA4; Utf8StringBuffer buffer = new Utf8StringBuffer(); for (int i = 0; i < bytes.length; i++) buffer.append(bytes[i]); assertEquals("\u00FC\u00F6\u00E4", buffer.toString()); }
/** * Decode String with % encoding. This method makes the assumption that the majority of calls will * need no decoding. */ public static String decodeString(String encoded, int offset, int length, String charset) { if (charset == null || StringUtil.isUTF8(charset)) { Utf8StringBuffer buffer = null; for (int i = 0; i < length; i++) { char c = encoded.charAt(offset + i); if (c < 0 || c > 0xff) { if (buffer == null) { buffer = new Utf8StringBuffer(length); buffer.getStringBuffer().append(encoded, offset, offset + i + 1); } else buffer.getStringBuffer().append(c); } else if (c == '+') { if (buffer == null) { buffer = new Utf8StringBuffer(length); buffer.getStringBuffer().append(encoded, offset, offset + i); } buffer.getStringBuffer().append(' '); } else if (c == '%' && (i + 2) < length) { if (buffer == null) { buffer = new Utf8StringBuffer(length); buffer.getStringBuffer().append(encoded, offset, offset + i); } try { byte b = (byte) TypeUtil.parseInt(encoded, offset + i + 1, 2, 16); buffer.append(b); i += 2; } catch (NumberFormatException nfe) { buffer.getStringBuffer().append('%'); } } else if (buffer != null) buffer.getStringBuffer().append(c); } if (buffer == null) { if (offset == 0 && encoded.length() == length) return encoded; return encoded.substring(offset, offset + length); } return buffer.toString(); } else { StringBuffer buffer = null; try { for (int i = 0; i < length; i++) { char c = encoded.charAt(offset + i); if (c < 0 || c > 0xff) { if (buffer == null) { buffer = new StringBuffer(length); buffer.append(encoded, offset, offset + i + 1); } else buffer.append(c); } else if (c == '+') { if (buffer == null) { buffer = new StringBuffer(length); buffer.append(encoded, offset, offset + i); } buffer.append(' '); } else if (c == '%' && (i + 2) < length) { if (buffer == null) { buffer = new StringBuffer(length); buffer.append(encoded, offset, offset + i); } byte[] ba = new byte[length]; int n = 0; while (c >= 0 && c <= 0xff) { if (c == '%') { if (i + 2 < length) { try { ba[n++] = (byte) TypeUtil.parseInt(encoded, offset + i + 1, 2, 16); i += 3; } catch (NumberFormatException nfe) { ba[n - 1] = (byte) '%'; for (char next; ((next = encoded.charAt(++i + offset)) != '%'); ) ba[n++] = (byte) (next == '+' ? ' ' : next); } } else { ba[n++] = (byte) '%'; i++; } } else if (c == '+') { ba[n++] = (byte) ' '; i++; } else { ba[n++] = (byte) c; i++; } if (i >= length) break; c = encoded.charAt(offset + i); } i--; buffer.append(new String(ba, 0, n, charset)); } else if (buffer != null) buffer.append(c); } if (buffer == null) { if (offset == 0 && encoded.length() == length) return encoded; return encoded.substring(offset, offset + length); } return buffer.toString(); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } }