Beispiel #1
0
 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
   int inPos = flow[0];
   int inLen = flow[1];
   int outPos = flow[2];
   int outLen = flow[3];
   int expedInPos = flow[4];
   int expedOutPos = flow[5];
   CoderResult expedCR = (flow[6] == 0) ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
   ByteBuffer bbf;
   CharBuffer cbf;
   if (direct) {
     bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
     cbf = ByteBuffer.allocateDirect((outPos + outLen) * 2).asCharBuffer();
   } else {
     bbf = ByteBuffer.allocate(inPos + bytes.length);
     cbf = CharBuffer.allocate(outPos + outLen);
   }
   bbf.position(inPos);
   bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
   cbf.position(outPos);
   dec.reset();
   CoderResult cr = dec.decode(bbf, cbf, false);
   if (cr != expedCR || bbf.position() != expedInPos || cbf.position() != expedOutPos) {
     System.out.printf("Expected(direct=%5b): [", direct);
     for (int i : flow) System.out.print(" " + i);
     System.out.println(
         "]  CR=" + cr + ", inPos=" + bbf.position() + ", outPos=" + cbf.position());
     return false;
   }
   return true;
 }
  static void testMixed(Charset cs) throws Throwable {
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    List<Integer> cps = new ArrayList<>(0x10000);
    int off = 0;
    int cp = 0;
    while (cp < 0x10000) {
      if (enc.canEncode((char) cp)) {
        cps.add(cp);
      }
      cp++;
    }
    Collections.shuffle(cps);
    char[] bmpCA = new char[cps.size()];
    for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i);
    String bmpStr = new String(bmpCA);
    // getBytes(csn);
    byte[] bmpBA = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(bmpBA, baNIO)) {
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
    }

    // getBytes(cs);
    bmpBA = bmpStr.getBytes(cs);
    if (!Arrays.equals(bmpBA, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(bmpBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
    if (!strNIO.equals(strSC)) {
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());
    }

    // new String(cs);
    strSC = new String(bmpBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());
  }
Beispiel #3
0
 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) throws Exception {
   String csn = cs.name();
   CharsetDecoder dec = cs.newDecoder();
   ByteBuffer bbf;
   CharBuffer cbf;
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(bb.length);
     cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer();
     bbf.put(bb);
   } else {
     bbf = ByteBuffer.wrap(bb);
     cbf = CharBuffer.allocate(bb.length);
   }
   CoderResult cr = null;
   long t1 = System.nanoTime() / 1000;
   for (int i = 0; i < iteration; i++) {
     bbf.rewind();
     cbf.clear();
     dec.reset();
     cr = dec.decode(bbf, cbf, true);
   }
   long t2 = System.nanoTime() / 1000;
   t.t = (t2 - t1) / iteration;
   if (cr != CoderResult.UNDERFLOW) {
     System.out.println("DEC-----------------");
     int pos = bbf.position();
     System.out.printf(
         "  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
         cr.toString(),
         pos,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff);
     throw new RuntimeException("Decoding err: " + csn);
   }
   char[] cc = new char[cbf.position()];
   cbf.flip();
   cbf.get(cc);
   return cc;
 }
Beispiel #4
0
 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) throws Exception {
   CharsetDecoder dec = cs.newDecoder();
   ByteBuffer bbf;
   CharBuffer cbf;
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(bb.length);
     cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer();
     bbf.put(bb).flip();
   } else {
     bbf = ByteBuffer.wrap(bb);
     cbf = CharBuffer.allocate(bb.length);
   }
   CoderResult cr = null;
   for (int i = 0; i < iteration; i++) {
     bbf.rewind();
     cbf.clear();
     dec.reset();
     cr = dec.decode(bbf, cbf, true);
   }
   return cr;
 }
  /**
   * Decode file charset.
   *
   * @param f File to process.
   * @return File charset.
   * @throws IOException in case of error.
   */
  public static Charset decode(File f) throws IOException {
    SortedMap<String, Charset> charsets = Charset.availableCharsets();

    String[] firstCharsets = {
      Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE"
    };

    Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size());

    for (String c : firstCharsets)
      if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c));

    orderedCharsets.addAll(charsets.values());

    try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
      FileChannel ch = raf.getChannel();

      ByteBuffer buf = ByteBuffer.allocate(4096);

      ch.read(buf);

      buf.flip();

      for (Charset charset : orderedCharsets) {
        CharsetDecoder decoder = charset.newDecoder();

        decoder.reset();

        try {
          decoder.decode(buf);

          return charset;
        } catch (CharacterCodingException ignored) {
        }
      }
    }

    return Charset.defaultCharset();
  }
  static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
    String bmpStr = new String(bmpCA);
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);

    // getBytes(csn);
    byte[] baSC = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());

    // getBytes(cs);
    baSC = bmpStr.getBytes(cs);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(sbBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();

    if (!strNIO.equals(strSC))
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());

    // new String(cs);
    strSC = new String(sbBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());

    // encode unmappable surrogates
    if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) {
      if (cs.name().equals("UTF-8")
          || // utf8 handles surrogates
          cs.name().equals("CESU-8")) // utf8 handles surrogates
      return;
      enc.replaceWith(new byte[] {(byte) 'A'});
      sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc;

      String str = "ab\uD800\uDC00\uD800\uDC00cd";
      byte[] ba = new byte[str.length() - 2];
      int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
        throw new RuntimeException("encode1(surrogates) failed  -> " + cs.name());

      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode2(surrogates) failed  -> " + cs.name());
      str = "ab\uD800B\uDC00Bcd";
      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode3(surrogates) failed  -> " + cs.name());
      /* sun.nio.cs.ArrayDeEncoder works on the assumption that the
         invoker (StringCoder) allocates enough output buf, utf8
         and double-byte coder does not check the output buffer limit.
      ba = new byte[str.length() - 1];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
          throw new RuntimeException("encode4(surrogates) failed  -> "
                                     + cs.name());
      }
      */
    }
  }