Example #1
0
  static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception {
    System.gc(); // enqueue finalizable objects
    Thread.sleep(1000);
    System.gc(); // enqueue finalizable objects

    String csn1 = cs1.name();
    String csn2 = cs2.name();
    System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);

    Time t1 = new Time();
    Time t2 = new Time();

    byte[] bb1 = encode(cc, cs1, false, t1);
    byte[] bb2 = encode(cc, cs2, false, t2);

    System.out.printf(
        "    Encoding TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));
    if (!Arrays.equals(bb1, bb2)) {
      System.out.printf("        encoding failed%n");
    }

    char[] cc2 = decode(bb1, cs2, false, t2);
    char[] cc1 = decode(bb1, cs1, false, t1);
    System.out.printf(
        "    Decoding TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));
    if (!Arrays.equals(cc1, cc2)) {
      System.out.printf("        decoding failed%n");
    }

    bb1 = encode(cc, cs1, true, t1);
    bb2 = encode(cc, cs2, true, t2);

    System.out.printf(
        "    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));

    if (!Arrays.equals(bb1, bb2)) System.out.printf("        encoding (direct) failed%n");

    cc1 = decode(bb1, cs1, true, t1);
    cc2 = decode(bb1, cs2, true, t2);
    System.out.printf(
        "    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));
    if (!Arrays.equals(cc1, cc2)) {
      System.out.printf("        decoding (direct) failed%n");
    }
  }
  static void testMixed(Charset cs) throws Throwable {
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    List<Integer> cps = new ArrayList<>(0x10000);
    int off = 0;
    int cp = 0;
    while (cp < 0x10000) {
      if (enc.canEncode((char) cp)) {
        cps.add(cp);
      }
      cp++;
    }
    Collections.shuffle(cps);
    char[] bmpCA = new char[cps.size()];
    for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i);
    String bmpStr = new String(bmpCA);
    // getBytes(csn);
    byte[] bmpBA = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(bmpBA, baNIO)) {
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
    }

    // getBytes(cs);
    bmpBA = bmpStr.getBytes(cs);
    if (!Arrays.equals(bmpBA, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(bmpBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
    if (!strNIO.equals(strSC)) {
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());
    }

    // new String(cs);
    strSC = new String(bmpBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());
  }
Example #3
0
  // check and compare canEncoding/Encoding
  static char[] checkEncoding(Charset oldCS, Charset newCS) throws Exception {
    System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
    CharsetEncoder encOLD = oldCS.newEncoder();
    CharsetEncoder encNew = newCS.newEncoder();
    char[] cc = new char[0x10000];
    int pos = 0;
    boolean is970 = "x-IBM970-Old".equals(oldCS.name());

    for (char c = 0; c < 0xffff; c++) {
      boolean canOld = encOLD.canEncode(c);
      boolean canNew = encNew.canEncode(c);

      if (is970 && c == 0x2299) continue;

      if (canOld != canNew) {
        if (canNew) {
          System.out.printf("      NEW(only): ");
          printEntry(c, newCS);
        } else {
          if (is970) {
            byte[] bb = new String(new char[] {c}).getBytes(oldCS);
            if (bb.length == 2 && bb[0] == (byte) 0xa2 && bb[1] == (byte) 0xc1) {
              // we know 970 has bogus nnnn -> a2c1 -> 2299
              continue;
            }
          }
          System.out.printf("      OLD(only): ");
          printEntry(c, oldCS);
        }
      } else if (canNew) {
        byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
        byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
        if (!Arrays.equals(bbNew, bbOld)) {
          System.out.printf("      c->b NEW: ");
          printEntry(c, newCS);
          System.out.printf("      c->b OLD: ");
          printEntry(c, oldCS);
        } else {
          String sNew = new String(bbNew, newCS);
          String sOld = new String(bbOld, oldCS);
          if (!sNew.equals(sOld)) {
            System.out.printf("      b2c NEW (c=%x):", c & 0xffff);
            printEntry(sNew.charAt(0), newCS);
            System.out.printf("      b2c OLD:");
            printEntry(sOld.charAt(0), oldCS);
          }
        }
      }
      if (canNew & canOld) { // added only both for now
        cc[pos++] = c;
      }
    }
    return Arrays.copyOf(cc, pos);
  }
  static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
    String bmpStr = new String(bmpCA);
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);

    // getBytes(csn);
    byte[] baSC = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());

    // getBytes(cs);
    baSC = bmpStr.getBytes(cs);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(sbBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();

    if (!strNIO.equals(strSC))
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());

    // new String(cs);
    strSC = new String(sbBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());

    // encode unmappable surrogates
    if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) {
      if (cs.name().equals("UTF-8")
          || // utf8 handles surrogates
          cs.name().equals("CESU-8")) // utf8 handles surrogates
      return;
      enc.replaceWith(new byte[] {(byte) 'A'});
      sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc;

      String str = "ab\uD800\uDC00\uD800\uDC00cd";
      byte[] ba = new byte[str.length() - 2];
      int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
        throw new RuntimeException("encode1(surrogates) failed  -> " + cs.name());

      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode2(surrogates) failed  -> " + cs.name());
      str = "ab\uD800B\uDC00Bcd";
      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode3(surrogates) failed  -> " + cs.name());
      /* sun.nio.cs.ArrayDeEncoder works on the assumption that the
         invoker (StringCoder) allocates enough output buf, utf8
         and double-byte coder does not check the output buffer limit.
      ba = new byte[str.length() - 1];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
          throw new RuntimeException("encode4(surrogates) failed  -> "
                                     + cs.name());
      }
      */
    }
  }