Example #1
0
 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   String csn = cs.name();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   long t1 = System.nanoTime() / 1000;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   long t2 = System.nanoTime() / 1000;
   t.t = (t2 - t1) / iteration;
   if (cr != CoderResult.UNDERFLOW) {
     System.out.println("ENC-----------------");
     int pos = cbf.position();
     System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff);
     throw new RuntimeException("Encoding err: " + csn);
   }
   byte[] bb = new byte[bbf.position()];
   bbf.flip();
   bbf.get(bb);
   return bb;
 }
Example #2
0
  // check and compare canEncoding/Encoding
  static char[] checkEncoding(Charset oldCS, Charset newCS) throws Exception {
    System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
    CharsetEncoder encOLD = oldCS.newEncoder();
    CharsetEncoder encNew = newCS.newEncoder();
    char[] cc = new char[0x10000];
    int pos = 0;
    boolean is970 = "x-IBM970-Old".equals(oldCS.name());

    for (char c = 0; c < 0xffff; c++) {
      boolean canOld = encOLD.canEncode(c);
      boolean canNew = encNew.canEncode(c);

      if (is970 && c == 0x2299) continue;

      if (canOld != canNew) {
        if (canNew) {
          System.out.printf("      NEW(only): ");
          printEntry(c, newCS);
        } else {
          if (is970) {
            byte[] bb = new String(new char[] {c}).getBytes(oldCS);
            if (bb.length == 2 && bb[0] == (byte) 0xa2 && bb[1] == (byte) 0xc1) {
              // we know 970 has bogus nnnn -> a2c1 -> 2299
              continue;
            }
          }
          System.out.printf("      OLD(only): ");
          printEntry(c, oldCS);
        }
      } else if (canNew) {
        byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
        byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
        if (!Arrays.equals(bbNew, bbOld)) {
          System.out.printf("      c->b NEW: ");
          printEntry(c, newCS);
          System.out.printf("      c->b OLD: ");
          printEntry(c, oldCS);
        } else {
          String sNew = new String(bbNew, newCS);
          String sOld = new String(bbOld, oldCS);
          if (!sNew.equals(sOld)) {
            System.out.printf("      b2c NEW (c=%x):", c & 0xffff);
            printEntry(sNew.charAt(0), newCS);
            System.out.printf("      b2c OLD:");
            printEntry(sOld.charAt(0), oldCS);
          }
        }
      }
      if (canNew & canOld) { // added only both for now
        cc[pos++] = c;
      }
    }
    return Arrays.copyOf(cc, pos);
  }
  static void testMixed(Charset cs) throws Throwable {
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    List<Integer> cps = new ArrayList<>(0x10000);
    int off = 0;
    int cp = 0;
    while (cp < 0x10000) {
      if (enc.canEncode((char) cp)) {
        cps.add(cp);
      }
      cp++;
    }
    Collections.shuffle(cps);
    char[] bmpCA = new char[cps.size()];
    for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i);
    String bmpStr = new String(bmpCA);
    // getBytes(csn);
    byte[] bmpBA = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(bmpBA, baNIO)) {
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
    }

    // getBytes(cs);
    bmpBA = bmpStr.getBytes(cs);
    if (!Arrays.equals(bmpBA, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(bmpBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
    if (!strNIO.equals(strSC)) {
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());
    }

    // new String(cs);
    strSC = new String(bmpBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());
  }
Example #4
0
 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   return cr;
 }
  public static void main(String[] args) throws Throwable {
    final int itrs = Integer.getInteger("iterations", 100000);
    // final int itrs = Integer.getInteger("iterations", 12);
    final int size = Integer.getInteger("size", 2048);
    final int subsize = Integer.getInteger("subsize", 128);
    final int maxchar = Integer.getInteger("maxchar", 128);
    final String regex = System.getProperty("filter");
    final Pattern filter = (regex == null) ? null : Pattern.compile(regex);
    final boolean useSecurityManager = Boolean.getBoolean("SecurityManager");
    if (useSecurityManager) System.setSecurityManager(new PermissiveSecurityManger());
    final Random rnd = new Random();

    String[] csns =
        new String[] {
          "Big5",
          "Johab",
          "EUC_CN",
          "EUC_KR",
          "MS932",
          "MS936",
          "MS949",
          "MS950",
          "GBK",
          "Big5_HKSCS",
          "Big5_HKSCS_2001",
          "Big5_Solaris",
          "MS950_HKSCS",
          "MS950_HKSCS_XP",
          "IBM1364",
          "IBM1381",
          "IBM1383",
          "IBM930",
          "IBM933",
          "IBM935",
          "IBM937",
          "IBM939",
          "IBM942",
          "IBM943",
          "IBM948",
          "IBM949",
          "IBM950",
          "IBM970",
        };

    ArrayList<long[]> sum = new ArrayList<>();

    for (final String csn : csns) {
      final Charset cs = Charset.forName(csn);
      List<Integer> cps = new ArrayList<>(0x4000);
      int off = 0;
      int cp = 0;
      int n = 0;
      CharsetEncoder enc = cs.newEncoder();
      while (cp < 0x10000 && n < cps.size()) {
        if (enc.canEncode((char) cp)) {
          cps.add(cp);
          n++;
        }
        cp++;
      }
      Collections.shuffle(cps);
      char[] ca = new char[cps.size()];
      for (int i = 0; i < cps.size(); i++) ca[i] = (char) (int) cps.get(i);

      System.out.printf("%n--------%s---------%n", csn);
      for (int sz = 8; sz <= 2048; sz *= 2) {
        System.out.printf("   [len=%d]%n", sz);

        final char[] chars = Arrays.copyOf(ca, sz);
        final String str = new String(chars);
        final byte[] bs = str.getBytes(cs);

        Job[] jobs = {
          new Job("String decode: csn") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) new String(bs, csn);
            }
          },
          new Job("String decode: cs") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) new String(bs, cs);
            }
          },
          new Job("String encode: csn") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) str.getBytes(csn);
            }
          },
          new Job("String encode: cs") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) str.getBytes(cs);
            }
          },
        };
        sum.add(time(jobs));
      }
    }
  }
  static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
    String bmpStr = new String(bmpCA);
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);

    // getBytes(csn);
    byte[] baSC = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());

    // getBytes(cs);
    baSC = bmpStr.getBytes(cs);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(sbBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();

    if (!strNIO.equals(strSC))
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());

    // new String(cs);
    strSC = new String(sbBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());

    // encode unmappable surrogates
    if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) {
      if (cs.name().equals("UTF-8")
          || // utf8 handles surrogates
          cs.name().equals("CESU-8")) // utf8 handles surrogates
      return;
      enc.replaceWith(new byte[] {(byte) 'A'});
      sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc;

      String str = "ab\uD800\uDC00\uD800\uDC00cd";
      byte[] ba = new byte[str.length() - 2];
      int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
        throw new RuntimeException("encode1(surrogates) failed  -> " + cs.name());

      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode2(surrogates) failed  -> " + cs.name());
      str = "ab\uD800B\uDC00Bcd";
      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode3(surrogates) failed  -> " + cs.name());
      /* sun.nio.cs.ArrayDeEncoder works on the assumption that the
         invoker (StringCoder) allocates enough output buf, utf8
         and double-byte coder does not check the output buffer limit.
      ba = new byte[str.length() - 1];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
          throw new RuntimeException("encode4(surrogates) failed  -> "
                                     + cs.name());
      }
      */
    }
  }