@Test public void verifyNullByteArray() throws Exception { for (Map.Entry<String, Charset> entry : CharsetUtil.getCharsetMap().entrySet()) { // test that the byte array wasn't changed Assert.assertEquals( "Charset " + entry.getKey() + " impl bad -- did not return null", "", CharsetUtil.decode(null, entry.getValue())); } }
@Test public void verifyDecodeDoesNotChangeByteArray() throws Exception { for (Map.Entry<String, Charset> entry : CharsetUtil.getCharsetMap().entrySet()) { byte[] bytes = new byte[] {(byte) 0x40, (byte) 0x5F, (byte) 0x24, (byte) 0x78, (byte) 0x02}; byte[] expectedBytes = Arrays.copyOf(bytes, bytes.length); String str0 = CharsetUtil.decode(bytes, entry.getValue()); // test that the byte array wasn't changed Assert.assertArrayEquals( "Charset " + entry.getKey() + " impl bad -- modified byte array parameter", expectedBytes, bytes); } }
@Test public void normalize() throws Exception { String in = null; // try every charset with simple A-Z, a-z, and 0-9 which should work // in all charsets in = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefjhijklmnopqrstuvwxyz01234567890?&@"; for (Map.Entry<String, Charset> entry : CharsetUtil.getCharsetMap().entrySet()) { Assert.assertEquals( "Charset " + entry.getKey() + " implementation broken", in, CharsetUtil.normalize(in, entry.getValue())); } in = "\u20AC"; // euro currency char (only supported in a couple charsets) Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_GSM)); Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_PACKED_GSM)); Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_AIRWIDE_GSM)); Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFD2_GSM)); Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFTR_GSM)); Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_1)); Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_15)); Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UCS_2)); Assert.assertEquals("\u20AC", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UTF_8)); in = "\u6025"; // arabic char (only supported in a couple charsets) Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_GSM)); Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_PACKED_GSM)); Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_AIRWIDE_GSM)); Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFD2_GSM)); Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_VFTR_GSM)); Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_1)); Assert.assertEquals("?", CharsetUtil.normalize(in, CharsetUtil.CHARSET_ISO_8859_15)); Assert.assertEquals("\u6025", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UCS_2)); Assert.assertEquals("\u6025", CharsetUtil.normalize(in, CharsetUtil.CHARSET_UTF_8)); }