static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception { System.gc(); // enqueue finalizable objects Thread.sleep(1000); System.gc(); // enqueue finalizable objects String csn1 = cs1.name(); String csn2 = cs2.name(); System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); Time t1 = new Time(); Time t2 = new Time(); byte[] bb1 = encode(cc, cs1, false, t1); byte[] bb2 = encode(cc, cs2, false, t2); System.out.printf( " Encoding TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(bb1, bb2)) { System.out.printf(" encoding failed%n"); } char[] cc2 = decode(bb1, cs2, false, t2); char[] cc1 = decode(bb1, cs1, false, t1); System.out.printf( " Decoding TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(cc1, cc2)) { System.out.printf(" decoding failed%n"); } bb1 = encode(cc, cs1, true, t1); bb2 = encode(cc, cs2, true, t2); System.out.printf( " Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(bb1, bb2)) System.out.printf(" encoding (direct) failed%n"); cc1 = decode(bb1, cs1, true, t1); cc2 = decode(bb1, cs2, true, t2); System.out.printf( " Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(cc1, cc2)) { System.out.printf(" decoding (direct) failed%n"); } }
static void testMixed(Charset cs) throws Throwable { CharsetDecoder dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); CharsetEncoder enc = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); List<Integer> cps = new ArrayList<>(0x10000); int off = 0; int cp = 0; while (cp < 0x10000) { if (enc.canEncode((char) cp)) { cps.add(cp); } cp++; } Collections.shuffle(cps); char[] bmpCA = new char[cps.size()]; for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i); String bmpStr = new String(bmpCA); // getBytes(csn); byte[] bmpBA = bmpStr.getBytes(cs.name()); ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA)); byte[] baNIO = new byte[bf.limit()]; bf.get(baNIO, 0, baNIO.length); if (!Arrays.equals(bmpBA, baNIO)) { throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); } // getBytes(cs); bmpBA = bmpStr.getBytes(cs); if (!Arrays.equals(bmpBA, baNIO)) throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); // new String(csn); String strSC = new String(bmpBA, cs.name()); String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString(); if (!strNIO.equals(strSC)) { throw new RuntimeException("new String(csn) failed -> " + cs.name()); } // new String(cs); strSC = new String(bmpBA, cs); if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed -> " + cs.name()); }
// check and compare canEncoding/Encoding static char[] checkEncoding(Charset oldCS, Charset newCS) throws Exception { System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name()); CharsetEncoder encOLD = oldCS.newEncoder(); CharsetEncoder encNew = newCS.newEncoder(); char[] cc = new char[0x10000]; int pos = 0; boolean is970 = "x-IBM970-Old".equals(oldCS.name()); for (char c = 0; c < 0xffff; c++) { boolean canOld = encOLD.canEncode(c); boolean canNew = encNew.canEncode(c); if (is970 && c == 0x2299) continue; if (canOld != canNew) { if (canNew) { System.out.printf(" NEW(only): "); printEntry(c, newCS); } else { if (is970) { byte[] bb = new String(new char[] {c}).getBytes(oldCS); if (bb.length == 2 && bb[0] == (byte) 0xa2 && bb[1] == (byte) 0xc1) { // we know 970 has bogus nnnn -> a2c1 -> 2299 continue; } } System.out.printf(" OLD(only): "); printEntry(c, oldCS); } } else if (canNew) { byte[] bbNew = new String(new char[] {c}).getBytes(newCS); byte[] bbOld = new String(new char[] {c}).getBytes(oldCS); if (!Arrays.equals(bbNew, bbOld)) { System.out.printf(" c->b NEW: "); printEntry(c, newCS); System.out.printf(" c->b OLD: "); printEntry(c, oldCS); } else { String sNew = new String(bbNew, newCS); String sOld = new String(bbOld, oldCS); if (!sNew.equals(sOld)) { System.out.printf(" b2c NEW (c=%x):", c & 0xffff); printEntry(sNew.charAt(0), newCS); System.out.printf(" b2c OLD:"); printEntry(sOld.charAt(0), oldCS); } } } if (canNew & canOld) { // added only both for now cc[pos++] = c; } } return Arrays.copyOf(cc, pos); }
static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable { String bmpStr = new String(bmpCA); CharsetDecoder dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); CharsetEncoder enc = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); // getBytes(csn); byte[] baSC = bmpStr.getBytes(cs.name()); ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA)); byte[] baNIO = new byte[bf.limit()]; bf.get(baNIO, 0, baNIO.length); if (!Arrays.equals(baSC, baNIO)) throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); // getBytes(cs); baSC = bmpStr.getBytes(cs); if (!Arrays.equals(baSC, baNIO)) throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); // new String(csn); String strSC = new String(sbBA, cs.name()); String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString(); if (!strNIO.equals(strSC)) throw new RuntimeException("new String(csn) failed -> " + cs.name()); // new String(cs); strSC = new String(sbBA, cs); if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed -> " + cs.name()); // encode unmappable surrogates if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) { if (cs.name().equals("UTF-8") || // utf8 handles surrogates cs.name().equals("CESU-8")) // utf8 handles surrogates return; enc.replaceWith(new byte[] {(byte) 'A'}); sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc; String str = "ab\uD800\uDC00\uD800\uDC00cd"; byte[] ba = new byte[str.length() - 2]; int n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 6 || !"abAAcd".equals(new String(ba, cs.name()))) throw new RuntimeException("encode1(surrogates) failed -> " + cs.name()); ba = new byte[str.length()]; n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name()))) throw new RuntimeException("encode2(surrogates) failed -> " + cs.name()); str = "ab\uD800B\uDC00Bcd"; ba = new byte[str.length()]; n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name()))) throw new RuntimeException("encode3(surrogates) failed -> " + cs.name()); /* sun.nio.cs.ArrayDeEncoder works on the assumption that the invoker (StringCoder) allocates enough output buf, utf8 and double-byte coder does not check the output buffer limit. ba = new byte[str.length() - 1]; n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) { throw new RuntimeException("encode4(surrogates) failed -> " + cs.name()); } */ } }