@Test(expected = IllegalArgumentException.class)
 public void testUtf8WithMissingByte() throws Exception {
   String source = "abc\u10fb";
   byte[] bytes = source.getBytes(StringUtil.__UTF8);
   Utf8StringBuffer buffer = new Utf8StringBuffer();
   for (int i = 0; i < bytes.length - 1; i++) buffer.append(bytes[i]);
   buffer.toString();
 }
 @Test
 public void testUtfStringBuffer() throws Exception {
   String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
   byte[] bytes = source.getBytes(StringUtil.__UTF8);
   Utf8StringBuffer buffer = new Utf8StringBuffer();
   for (byte aByte : bytes) buffer.append(aByte);
   assertEquals(source, buffer.toString());
   assertTrue(buffer.toString().endsWith("jetty"));
 }
  @Test(expected = Utf8Appendable.NotUtf8Exception.class)
  public void testUtf8WithAdditionalByte() throws Exception {
    String source = "abcXX";
    byte[] bytes = source.getBytes(StringUtil.__UTF8);
    bytes[3] = (byte) 0xc0;
    bytes[4] = (byte) 0x00;

    Utf8StringBuffer buffer = new Utf8StringBuffer();
    for (byte aByte : bytes) buffer.append(aByte);
  }
  @Test
  public void testUTF32codes() throws Exception {
    String source = "\uD842\uDF9F";
    byte[] bytes = source.getBytes("UTF-8");

    String jvmcheck = new String(bytes, 0, bytes.length, "UTF-8");
    assertEquals(source, jvmcheck);

    Utf8StringBuffer buffer = new Utf8StringBuffer();
    buffer.append(bytes, 0, bytes.length);
    String result = buffer.toString();
    assertEquals(source, result);
  }
  @Test
  public void testGermanUmlauts() throws Exception {
    byte[] bytes = new byte[6];
    bytes[0] = (byte) 0xC3;
    bytes[1] = (byte) 0xBC;
    bytes[2] = (byte) 0xC3;
    bytes[3] = (byte) 0xB6;
    bytes[4] = (byte) 0xC3;
    bytes[5] = (byte) 0xA4;

    Utf8StringBuffer buffer = new Utf8StringBuffer();
    for (int i = 0; i < bytes.length; i++) buffer.append(bytes[i]);

    assertEquals("\u00FC\u00F6\u00E4", buffer.toString());
  }
  public static void main(String[] arg) throws Exception {
    String string = "Now \u0690xxxxxxxx";
    System.err.println(string);
    byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
    System.err.println(new String(bytes));
    System.err.println(bytes.length);
    long calc = 0;
    Utf8StringBuffer strbuf = new Utf8StringBuffer(bytes.length);
    for (int i = 0; i < 10; i++) {
      long s1 = System.currentTimeMillis();
      for (int j = 1000000; j-- > 0; ) {
        calc += new String(bytes, 0, bytes.length, StandardCharsets.UTF_8).hashCode();
      }
      long s2 = System.currentTimeMillis();
      for (int j = 1000000; j-- > 0; ) {
        calc += StringUtil.toUTF8String(bytes, 0, bytes.length).hashCode();
      }
      long s3 = System.currentTimeMillis();
      for (int j = 1000000; j-- > 0; ) {
        Utf8StringBuffer buffer = new Utf8StringBuffer(bytes.length);
        buffer.append(bytes, 0, bytes.length);
        calc += buffer.toString().hashCode();
      }
      long s4 = System.currentTimeMillis();
      for (int j = 1000000; j-- > 0; ) {
        strbuf.reset();
        strbuf.append(bytes, 0, bytes.length);
        calc += strbuf.toString().hashCode();
      }
      long s5 = System.currentTimeMillis();

      System.err.println((s2 - s1) + ", " + (s3 - s2) + ", " + (s4 - s3) + ", " + (s5 - s4));
    }
    System.err.println(calc);
  }
 @Test(expected = Utf8Appendable.NotUtf8Exception.class)
 public void testInvalidUTF8() throws UnsupportedEncodingException {
   Utf8StringBuffer buffer = new Utf8StringBuffer();
   buffer.append((byte) 0xC2);
   buffer.append((byte) 0xC2);
 }
Example #8
0
  /**
   * Decode String with % encoding. This method makes the assumption that the majority of calls will
   * need no decoding.
   */
  public static String decodeString(String encoded, int offset, int length, String charset) {
    if (charset == null || StringUtil.isUTF8(charset)) {
      Utf8StringBuffer buffer = null;

      for (int i = 0; i < length; i++) {
        char c = encoded.charAt(offset + i);
        if (c < 0 || c > 0xff) {
          if (buffer == null) {
            buffer = new Utf8StringBuffer(length);
            buffer.getStringBuffer().append(encoded, offset, offset + i + 1);
          } else buffer.getStringBuffer().append(c);
        } else if (c == '+') {
          if (buffer == null) {
            buffer = new Utf8StringBuffer(length);
            buffer.getStringBuffer().append(encoded, offset, offset + i);
          }

          buffer.getStringBuffer().append(' ');
        } else if (c == '%' && (i + 2) < length) {
          if (buffer == null) {
            buffer = new Utf8StringBuffer(length);
            buffer.getStringBuffer().append(encoded, offset, offset + i);
          }

          try {
            byte b = (byte) TypeUtil.parseInt(encoded, offset + i + 1, 2, 16);
            buffer.append(b);
            i += 2;
          } catch (NumberFormatException nfe) {
            buffer.getStringBuffer().append('%');
          }
        } else if (buffer != null) buffer.getStringBuffer().append(c);
      }

      if (buffer == null) {
        if (offset == 0 && encoded.length() == length) return encoded;
        return encoded.substring(offset, offset + length);
      }

      return buffer.toString();
    } else {
      StringBuffer buffer = null;

      try {
        for (int i = 0; i < length; i++) {
          char c = encoded.charAt(offset + i);
          if (c < 0 || c > 0xff) {
            if (buffer == null) {
              buffer = new StringBuffer(length);
              buffer.append(encoded, offset, offset + i + 1);
            } else buffer.append(c);
          } else if (c == '+') {
            if (buffer == null) {
              buffer = new StringBuffer(length);
              buffer.append(encoded, offset, offset + i);
            }

            buffer.append(' ');
          } else if (c == '%' && (i + 2) < length) {
            if (buffer == null) {
              buffer = new StringBuffer(length);
              buffer.append(encoded, offset, offset + i);
            }

            byte[] ba = new byte[length];
            int n = 0;
            while (c >= 0 && c <= 0xff) {
              if (c == '%') {
                if (i + 2 < length) {
                  try {
                    ba[n++] = (byte) TypeUtil.parseInt(encoded, offset + i + 1, 2, 16);
                    i += 3;
                  } catch (NumberFormatException nfe) {
                    ba[n - 1] = (byte) '%';
                    for (char next; ((next = encoded.charAt(++i + offset)) != '%'); )
                      ba[n++] = (byte) (next == '+' ? ' ' : next);
                  }
                } else {
                  ba[n++] = (byte) '%';
                  i++;
                }
              } else if (c == '+') {
                ba[n++] = (byte) ' ';
                i++;
              } else {
                ba[n++] = (byte) c;
                i++;
              }

              if (i >= length) break;
              c = encoded.charAt(offset + i);
            }

            i--;
            buffer.append(new String(ba, 0, n, charset));

          } else if (buffer != null) buffer.append(c);
        }

        if (buffer == null) {
          if (offset == 0 && encoded.length() == length) return encoded;
          return encoded.substring(offset, offset + length);
        }

        return buffer.toString();
      } catch (UnsupportedEncodingException e) {
        throw new RuntimeException(e);
      }
    }
  }