@Test
 public void verifyNullByteArray() throws Exception {
   for (Map.Entry<String, Charset> entry : CharsetUtil.getCharsetMap().entrySet()) {
     // test that the byte array wasn't changed
     Assert.assertEquals(
         "Charset " + entry.getKey() + " impl bad -- did not return null",
         "",
         CharsetUtil.decode(null, entry.getValue()));
   }
 }
 @Test
 public void verifyDecodeDoesNotChangeByteArray() throws Exception {
   for (Map.Entry<String, Charset> entry : CharsetUtil.getCharsetMap().entrySet()) {
     byte[] bytes = new byte[] {(byte) 0x40, (byte) 0x5F, (byte) 0x24, (byte) 0x78, (byte) 0x02};
     byte[] expectedBytes = Arrays.copyOf(bytes, bytes.length);
     String str0 = CharsetUtil.decode(bytes, entry.getValue());
     // test that the byte array wasn't changed
     Assert.assertArrayEquals(
         "Charset " + entry.getKey() + " impl bad -- modified byte array parameter",
         expectedBytes,
         bytes);
   }
 }
  @Test
  public void normalize() throws Exception {
    String in = null;

    // try every charset with simple A-Z, a-z, and 0-9 which should work
    // in all charsets
    in = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefjhijklmnopqrstuvwxyz01234567890?&@";
    for (Map.Entry<String, Charset> entry : CharsetUtil.getCharsetMap().entrySet()) {
      Assert.assertEquals(
          "Charset " + entry.getKey() + " implementation broken",
          in,
          CharsetUtil.normalize(in, entry.getValue()));
    }

    in = "\u20AC"; // euro currency char (only supported in a couple charsets)
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_GSM));
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_PACKED_GSM));
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_AIRWIDE_GSM));
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFD2_GSM));
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFTR_GSM));
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_1));
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_15));
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UCS_2));
    Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UTF_8));

    in = "\u6025"; // arabic char (only supported in a couple charsets)
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_GSM));
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_PACKED_GSM));
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_AIRWIDE_GSM));
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFD2_GSM));
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFTR_GSM));
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_1));
    Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_15));
    Assert.assertEquals("\u6025", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UCS_2));
    Assert.assertEquals("\u6025", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UTF_8));
  }
  @Test
  public void encode() throws Exception {
    // euro currency symbol
    String str0 = "\u20ac";
    byte[] bytes = null;

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("1B65"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_PACKED_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("9B32"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UCS_2);
    Assert.assertArrayEquals(HexUtil.toByteArray("20AC"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-16BE"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UTF_8);
    Assert.assertArrayEquals(HexUtil.toByteArray("E282AC"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-8"), bytes);

    // latin-1 doesn't contain the euro symbol - replace with '?'
    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_1);
    Assert.assertArrayEquals(HexUtil.toByteArray("3F"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-1"), bytes);

    // latin-9 does contain the euro symbol
    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertArrayEquals(HexUtil.toByteArray("A4"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-15"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertArrayEquals(HexUtil.toByteArray("1B65"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("1B65"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("1B65"), bytes);

    // longer string with @ symbol in-between
    str0 = "Hello @ World";

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("48656C6C6F200020576F726C64"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_PACKED_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("C8329BFD060140D7B79C4D06"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UCS_2);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(
        HexUtil.toByteArray("00480065006C006C006F0020004000200057006F0072006C0064"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-16BE"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UTF_8);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("48656C6C6F204020576F726C64"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-8"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_1);
    Assert.assertArrayEquals(HexUtil.toByteArray("48656C6C6F204020576F726C64"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-1"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertArrayEquals(HexUtil.toByteArray("48656C6C6F204020576F726C64"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-15"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertArrayEquals(HexUtil.toByteArray("48656C6C6F200020576F726C64"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFD2_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("48656C6C6F204020576F726C64"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFTR_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("48656C6C6F204020576F726C64"), bytes);

    // longer string with @ symbol in-between
    str0 = "JoeyBlue";

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("4A6F6579426C7565"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_PACKED_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("CA77392F64D7CB"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UCS_2);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("004A006F006500790042006C00750065"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-16BE"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UTF_8);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("4A6F6579426C7565"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-8"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_1);
    Assert.assertArrayEquals(HexUtil.toByteArray("4A6F6579426C7565"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-1"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertArrayEquals(HexUtil.toByteArray("4A6F6579426C7565"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-15"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertArrayEquals(HexUtil.toByteArray("4A6F6579426C7565"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("4A6F6579426C7565"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("4A6F6579426C7565"), bytes);

    // longer string with @ symbol in-between
    str0 = "{}[]$";

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("1B281B291B3C1B3E02"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_PACKED_GSM);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("1BD426B5E16D7C02"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UCS_2);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("007B007D005B005D0024"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-16BE"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_UTF_8);
    // logger.debug(HexUtil.toHexString(bytes));
    Assert.assertArrayEquals(HexUtil.toByteArray("7B7D5B5D24"), bytes);
    Assert.assertArrayEquals(str0.getBytes("UTF-8"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_1);
    Assert.assertArrayEquals(HexUtil.toByteArray("7B7D5B5D24"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-1"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertArrayEquals(HexUtil.toByteArray("7B7D5B5D24"), bytes);
    Assert.assertArrayEquals(str0.getBytes("ISO-8859-15"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertArrayEquals(HexUtil.toByteArray("1B281B291B3C1B3E02"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("1B281B291B3C1B3E24"), bytes);

    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("1B281B291B3C1B3E24"), bytes);

    // chars specifically to vodafone-turkey
    // str0 = "$@£¤¥§ÄÅßñΓΔΘΩ€";
    str0 = "$@\u00a3\u00a4\u00a5\u00a7\u00c4\u00c5\u00df\u00f1\u0393\u0394\u0398\u03a9\u20ac";
    bytes = CharsetUtil.encode(str0, CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertArrayEquals(HexUtil.toByteArray("2440A3A4A5A7C4C5DFF1137F19151B65"), bytes);
  }
  @Test
  public void decode() throws Exception {
    // euro currency symbol
    String str0 = "\u20ac";
    String str1 = null;

    str1 = CharsetUtil.decode(HexUtil.toByteArray("1B65"), CharsetUtil.CHARSET_GSM);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("9B32"), CharsetUtil.CHARSET_PACKED_GSM);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("20AC"), CharsetUtil.CHARSET_UCS_2);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("E282AC"), CharsetUtil.CHARSET_UTF_8);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("1B65"), CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertEquals(str0, str1);

    // latin-1 doesn't contain the euro symbol - replace with '?'
    // str1 = CharsetUtil.decode(HexUtil.toByteArray("3F"), CharsetUtil.CHARSET_ISO_8859_1);
    // Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("A4"), CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("1B65"), CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("1B65"), CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertEquals(str0, str1);

    // longer string with @ symbol in-between
    str0 = "Hello @ World";

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("48656C6C6F200020576F726C64"), CharsetUtil.CHARSET_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("C8329BFD060140D7B79C4D06"), CharsetUtil.CHARSET_PACKED_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("00480065006C006C006F0020004000200057006F0072006C0064"),
            CharsetUtil.CHARSET_UCS_2);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("48656C6C6F204020576F726C64"), CharsetUtil.CHARSET_UTF_8);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("48656C6C6F204020576F726C64"), CharsetUtil.CHARSET_ISO_8859_1);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("48656C6C6F204020576F726C64"), CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("48656C6C6F204020576F726C64"), CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("48656C6C6F204020576F726C64"), CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("48656C6C6F204020576F726C64"), CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertEquals(str0, str1);

    // longer string with @ symbol in-between
    str0 = "JoeyBlue";

    str1 = CharsetUtil.decode(HexUtil.toByteArray("4A6F6579426C7565"), CharsetUtil.CHARSET_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("CA77392F64D7CB"), CharsetUtil.CHARSET_PACKED_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("004A006F006500790042006C00750065"), CharsetUtil.CHARSET_UCS_2);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("4A6F6579426C7565"), CharsetUtil.CHARSET_UTF_8);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("4A6F6579426C7565"), CharsetUtil.CHARSET_ISO_8859_1);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("4A6F6579426C7565"), CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("4A6F6579426C7565"), CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("4A6F6579426C7565"), CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("4A6F6579426C7565"), CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertEquals(str0, str1);

    // longer string with @ symbol in-between
    str0 = "{}[]$";

    str1 = CharsetUtil.decode(HexUtil.toByteArray("1B281B291B3C1B3E02"), CharsetUtil.CHARSET_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("1BD426B5E16D7C02"), CharsetUtil.CHARSET_PACKED_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("007B007D005B005D0024"), CharsetUtil.CHARSET_UCS_2);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("7B7D5B5D24"), CharsetUtil.CHARSET_UTF_8);
    Assert.assertEquals(str0, str1);

    // airwide is close to GSM, $ is 0x24 rather than 0x02 though
    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("1B281B291B3C1B3E24"), CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("7B7D5B5D24"), CharsetUtil.CHARSET_ISO_8859_1);
    Assert.assertEquals(str0, str1);

    str1 = CharsetUtil.decode(HexUtil.toByteArray("7B7D5B5D24"), CharsetUtil.CHARSET_ISO_8859_15);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("1B281B291B3C1B3E24"), CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertEquals(str0, str1);

    str1 =
        CharsetUtil.decode(HexUtil.toByteArray("1B281B291B3C1B3E24"), CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertEquals(str0, str1);

    // had problem passing these tests on linux vs. mac os x -- issue with
    // byte encoding on differnet platforms, replaced tests with source strings
    // that use Java unicode escapes
    // helpful URL: http://www.greywyvern.com/code/php/utf8_html
    // decode a string with every char in VFD2-GSM
    // str0 = "@$ߤ¡Ñܧñü_";
    str0 = "@$\u00df\u00a4\u00a1\u00d1\u00dc\u00a7\u00f1\u00fc_";
    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("40247E02A15F5D5E1E7D11"), CharsetUtil.CHARSET_VFD2_GSM);
    Assert.assertEquals(str0, str1);

    // str0 = "@$ß$@ÑÜ_ñü_";
    str0 = "@$\u00df$@\u00d1\u00dc_\u00f1\u00fc_";
    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("40241E24405D5E5F7D7E5F"), CharsetUtil.CHARSET_AIRWIDE_IA5);
    Assert.assertEquals(str0, str1);

    // chars specifically to vodafone-turkey
    // str0 = "$@£¤¥§ÄÅßñΓΔΘΩ€";
    str0 = "$@\u00a3\u00a4\u00a5\u00a7\u00c4\u00c5\u00df\u00f1\u0393\u0394\u0398\u03a9\u20ac";
    str1 =
        CharsetUtil.decode(
            HexUtil.toByteArray("2440A3A4A5A7C4C5DFF1137F19151B65"), CharsetUtil.CHARSET_VFTR_GSM);
    Assert.assertEquals(str0, str1);
  }