Esempio n. 1
0
  public static void main(String args[]) throws Exception {
    String inputFile = "samplein.txt";
    String outputFile = "sampleout.txt";

    RandomAccessFile inf = new RandomAccessFile(inputFile, "r");
    RandomAccessFile outf = new RandomAccessFile(outputFile, "rw");
    long inputLength = new File(inputFile).length();

    FileChannel inc = inf.getChannel();
    FileChannel outc = outf.getChannel();

    MappedByteBuffer inputData = inc.map(FileChannel.MapMode.READ_ONLY, 0, inputLength);

    Charset latin1 = Charset.forName("ISO-8859-1");
    CharsetDecoder decoder = latin1.newDecoder();
    CharsetEncoder encoder = latin1.newEncoder();

    CharBuffer cb = decoder.decode(inputData);

    // Process char data here

    ByteBuffer outputData = encoder.encode(cb);

    outc.write(outputData);

    inf.close();
    outf.close();
  }
Esempio n. 2
0
 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
   int inPos = flow[0];
   int inLen = flow[1];
   int outPos = flow[2];
   int outLen = flow[3];
   int expedInPos = flow[4];
   int expedOutPos = flow[5];
   CoderResult expedCR = (flow[6] == 0) ? CoderResult.UNDERFLOW : CoderResult.OVERFLOW;
   ByteBuffer bbf;
   CharBuffer cbf;
   if (direct) {
     bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
     cbf = ByteBuffer.allocateDirect((outPos + outLen) * 2).asCharBuffer();
   } else {
     bbf = ByteBuffer.allocate(inPos + bytes.length);
     cbf = CharBuffer.allocate(outPos + outLen);
   }
   bbf.position(inPos);
   bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
   cbf.position(outPos);
   dec.reset();
   CoderResult cr = dec.decode(bbf, cbf, false);
   if (cr != expedCR || bbf.position() != expedInPos || cbf.position() != expedOutPos) {
     System.out.printf("Expected(direct=%5b): [", direct);
     for (int i : flow) System.out.print(" " + i);
     System.out.println(
         "]  CR=" + cr + ", inPos=" + bbf.position() + ", outPos=" + cbf.position());
     return false;
   }
   return true;
 }
  /**
   * Read a 'n' bytes from buffer into a String where n is the framesize - offset so therefore
   * cannot use this if there are other objects after it because it has no delimiter.
   *
   * <p>Must take into account the text encoding defined in the Encoding Object ID3 Text Frames
   * often allow multiple strings seperated by the null char appropriate for the encoding.
   *
   * @param arr this is the buffer for the frame
   * @param offset this is where to start reading in the buffer for this field
   * @throws NullPointerException
   * @throws IndexOutOfBoundsException
   */
  public void readByteArray(byte[] arr, int offset) throws InvalidDataTypeException {
    logger.finest("Reading from array from offset:" + offset);

    // Get the Specified Decoder
    String charSetName = getTextEncodingCharSet();
    CharsetDecoder decoder = Charset.forName(charSetName).newDecoder();
    decoder.reset();

    // Decode sliced inBuffer
    ByteBuffer inBuffer;
    // #302 [dallen] truncating array manually since the decoder.decode() does not honor the offset
    // in the in buffer
    byte[] truncArr = new byte[arr.length - offset];
    System.arraycopy(arr, offset, truncArr, 0, truncArr.length);
    inBuffer = ByteBuffer.wrap(truncArr);

    CharBuffer outBuffer = CharBuffer.allocate(arr.length - offset);
    CoderResult coderResult = decoder.decode(inBuffer, outBuffer, true);
    if (coderResult.isError()) {
      logger.warning("Decoding error:" + coderResult.toString());
    }
    decoder.flush(outBuffer);
    outBuffer.flip();

    // If using UTF16 with BOM we then search through the text removing any BOMs that could exist
    // for multiple values, BOM could be Big Endian or Little Endian
    if (charSetName.equals(TextEncoding.CHARSET_UTF_16)) {
      value = outBuffer.toString().replace("\ufeff", "").replace("\ufffe", "");
    } else {
      value = outBuffer.toString();
    }
    // SetSize, important this is correct for finding the next datatype
    setSize(arr.length - offset);
    logger.config("Read SizeTerminatedString:" + value + " size:" + size);
  }
Esempio n. 4
0
 public static void main(String[] arguments) {
   try {
     // read byte data into a byte buffer
     String data = "friends.dat";
     FileInputStream inData = new FileInputStream(data);
     FileChannel inChannel = inData.getChannel();
     long inSize = inChannel.size();
     ByteBuffer source = ByteBuffer.allocate((int) inSize);
     inChannel.read(source, 0);
     source.position(0);
     System.out.println("Original byte data:");
     for (int i = 0; source.remaining() > 0; i++) {
       System.out.print(source.get() + " ");
     }
     // convert byte data into character data
     source.position(0);
     Charset ascii = Charset.forName("US-ASCII");
     CharsetDecoder toAscii = ascii.newDecoder();
     CharBuffer destination = toAscii.decode(source);
     destination.position(0);
     System.out.println("\n\nNew character data:");
     for (int i = 0; destination.remaining() > 0; i++) {
       System.out.print(destination.get());
     }
     System.out.println();
   } catch (FileNotFoundException fne) {
     System.out.println(fne.getMessage());
   } catch (IOException ioe) {
     System.out.println(ioe.getMessage());
   }
 }
  static void testMixed(Charset cs) throws Throwable {
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    List<Integer> cps = new ArrayList<>(0x10000);
    int off = 0;
    int cp = 0;
    while (cp < 0x10000) {
      if (enc.canEncode((char) cp)) {
        cps.add(cp);
      }
      cp++;
    }
    Collections.shuffle(cps);
    char[] bmpCA = new char[cps.size()];
    for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i);
    String bmpStr = new String(bmpCA);
    // getBytes(csn);
    byte[] bmpBA = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(bmpBA, baNIO)) {
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
    }

    // getBytes(cs);
    bmpBA = bmpStr.getBytes(cs);
    if (!Arrays.equals(bmpBA, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(bmpBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
    if (!strNIO.equals(strSC)) {
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());
    }

    // new String(cs);
    strSC = new String(bmpBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());
  }
 /**
  * Sets the substitution characters to use when the converter is in substitution mode. The given
  * chars must not be longer than the value returned by getMaxCharsPerByte for this converter.
  *
  * @param chars the substitution chars
  * @exception IllegalArgumentException if given byte array is longer than the value returned by
  *     the method getMaxBytesPerChar.
  * @see #setSubstitutionMode
  * @see #getMaxBytesPerChar
  */
 public void setSubstitutionChars(char[] chars) throws IllegalArgumentException {
   if (decoder != null) decoder.replaceWith(new String(chars));
   else { // only provided for subclasses
     if (chars.length > getMaxCharsPerByte()) throw new IllegalArgumentException();
     subChars = new char[chars.length];
     System.arraycopy(chars, 0, subChars, 0, chars.length);
   }
 }
Esempio n. 7
0
 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) throws Exception {
   String csn = cs.name();
   CharsetDecoder dec = cs.newDecoder();
   ByteBuffer bbf;
   CharBuffer cbf;
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(bb.length);
     cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer();
     bbf.put(bb);
   } else {
     bbf = ByteBuffer.wrap(bb);
     cbf = CharBuffer.allocate(bb.length);
   }
   CoderResult cr = null;
   long t1 = System.nanoTime() / 1000;
   for (int i = 0; i < iteration; i++) {
     bbf.rewind();
     cbf.clear();
     dec.reset();
     cr = dec.decode(bbf, cbf, true);
   }
   long t2 = System.nanoTime() / 1000;
   t.t = (t2 - t1) / iteration;
   if (cr != CoderResult.UNDERFLOW) {
     System.out.println("DEC-----------------");
     int pos = bbf.position();
     System.out.printf(
         "  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
         cr.toString(),
         pos,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff);
     throw new RuntimeException("Decoding err: " + csn);
   }
   char[] cc = new char[cbf.position()];
   cbf.flip();
   cbf.get(cc);
   return cc;
 }
Esempio n. 8
0
 public String readAsString(Charset charset) throws CharacterCodingException {
   int unreadSize = totalBytesUnread();
   if (unreadSize > 0) {
     CharsetDecoder decoder =
         charset
             .newDecoder()
             .onMalformedInput(CodingErrorAction.REPLACE)
             .onUnmappableCharacter(CodingErrorAction.REPLACE);
     CharBuffer charbuffer = CharBuffer.allocate(unreadSize);
     ByteBuffer buf = null;
     while (prepareRead() != -1) {
       buf = currentReadChunk.readToNioBuffer();
       boolean endOfInput = (prepareRead() == -1);
       CoderResult result = decoder.decode(buf, charbuffer, endOfInput);
       if (endOfInput) {
         if (!result.isUnderflow()) {
           result.throwException();
         }
       }
     }
     CoderResult result = decoder.flush(charbuffer);
     if (buf.hasRemaining()) {
       throw new IllegalStateException("There's a bug here, buffer wasn't read fully.");
     }
     if (!result.isUnderflow()) result.throwException();
     charbuffer.flip();
     String str;
     if (charbuffer.hasArray()) {
       int len = charbuffer.remaining();
       char[] ch = charbuffer.array();
       if (len != ch.length) {
         ch = ArrayUtils.subarray(ch, 0, len);
       }
       str = StringCharArrayAccessor.createString(ch);
     } else {
       str = charbuffer.toString();
     }
     return str;
   }
   return null;
 }
Esempio n. 9
0
 @Override
 int read(final TextInput ti) throws IOException {
   int c = -1;
   while (++c < 4) {
     final int ch = ti.readByte();
     if (ch < 0) break;
     cache[c] = (byte) ch;
     outc.position(0);
     inc.position(0);
     inc.limit(c + 1);
     csd.reset();
     final CoderResult cr = csd.decode(inc, outc, true);
     if (cr.isMalformed()) continue;
     // return character
     int i = 0;
     final int os = outc.position();
     for (int o = 0; o < os; ++o) i |= outc.get(o) << (o << 3);
     return i;
   }
   return c == 0 ? -1 : invalid();
 }
 public static void main(String[] args) throws Exception {
   // 创建简体中文对应的Charset
   Charset cn = Charset.forName("GBK");
   // 获取cn对象对应的编码器和解码器
   CharsetEncoder cnEncoder = cn.newEncoder();
   CharsetDecoder cnDecoder = cn.newDecoder();
   // 创建一个CharBuffer对象
   CharBuffer cbuff = CharBuffer.allocate(8);
   cbuff.put('孙');
   cbuff.put('悟');
   cbuff.put('空');
   cbuff.flip();
   // 将CharBuffer中的字符序列转换成字节序列
   ByteBuffer bbuff = cnEncoder.encode(cbuff);
   // 循环访问ByteBuffer中的每个字节
   for (int i = 0; i < bbuff.capacity(); i++) {
     System.out.print(bbuff.get(i) + " ");
   }
   // 将ByteBuffer的数据解码成字符序列
   System.out.println("\n" + cnDecoder.decode(bbuff));
 }
Esempio n. 11
0
 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) throws Exception {
   CharsetDecoder dec = cs.newDecoder();
   ByteBuffer bbf;
   CharBuffer cbf;
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(bb.length);
     cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer();
     bbf.put(bb).flip();
   } else {
     bbf = ByteBuffer.wrap(bb);
     cbf = CharBuffer.allocate(bb.length);
   }
   CoderResult cr = null;
   for (int i = 0; i < iteration; i++) {
     bbf.rewind();
     cbf.clear();
     dec.reset();
     cr = dec.decode(bbf, cbf, true);
   }
   return cr;
 }
  /**
   * Writes any remaining output to the output buffer and resets the converter to its initial state.
   *
   * @param output char array to receive flushed output.
   * @param outStart start writing to output array at this offset.
   * @param outEnd stop writing to output array at this offset (exclusive).
   * @exception MalformedInputException if the output to be flushed contained a partial or invalid
   *     multibyte character sequence. flush will write what it can to the output buffer and reset
   *     the converter before throwing this exception. An additional call to flush is not required.
   * @exception ConversionBufferFullException if output array is filled before all the output can be
   *     flushed. flush will write what it can to the output buffer and remember its state. An
   *     additional call to flush with a new output buffer will conclude the operation.
   */
  public int flush(char[] output, int outStart, int outEnd)
      throws MalformedInputException, ConversionBufferFullException {

    byteOff = charOff = 0;
    if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException();
    if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd);
    else dst = CharBuffer.wrap(output, outStart, outEnd - outStart);

    CoderResult cr = null;
    try {
      if (src != null) cr = decoder.decode((ByteBuffer) src.clear(), dst, true);
      assert !cr.isUnmappable();
      if (cr.isMalformed()) {
        badInputLength = cr.length();
        reset();
        throw new MalformedInputException();
      }
    } catch (IllegalStateException ise) {
      if (src != null) cr = decoder.reset().decode(src, dst, true);
    }
    try {
      cr = decoder.flush(dst);
    } catch (Exception e) {
      assert false;
    } finally {
      byteOff = 0;
      charOff = dst.position();
      src = null;
    }
    if (cr.isOverflow()) throw new ConversionBufferFullException();

    // Return the length written to the output buffer
    if (cr.isUnderflow()) {
      int written = charOff - outStart;
      reset();
      return written;
    }
    assert false;
    return -1; // should be never reached
  }
Esempio n. 13
0
  /**
   * Decode file charset.
   *
   * @param f File to process.
   * @return File charset.
   * @throws IOException in case of error.
   */
  public static Charset decode(File f) throws IOException {
    SortedMap<String, Charset> charsets = Charset.availableCharsets();

    String[] firstCharsets = {
      Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE"
    };

    Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size());

    for (String c : firstCharsets)
      if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c));

    orderedCharsets.addAll(charsets.values());

    try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
      FileChannel ch = raf.getChannel();

      ByteBuffer buf = ByteBuffer.allocate(4096);

      ch.read(buf);

      buf.flip();

      for (Charset charset : orderedCharsets) {
        CharsetDecoder decoder = charset.newDecoder();

        decoder.reset();

        try {
          decoder.decode(buf);

          return charset;
        } catch (CharacterCodingException ignored) {
        }
      }
    }

    return Charset.defaultCharset();
  }
  /**
   * Converts an array of bytes containing characters in an external encoding into an array of
   * Unicode characters. This method allows a buffer by buffer conversion of a data stream. The
   * state of the conversion is saved between calls to convert. Among other things, this means
   * multibyte input sequences can be split between calls. If a call to convert results in an
   * exception, the conversion may be continued by calling convert again with suitably modified
   * parameters. All conversions should be finished with a call to the flush method.
   *
   * @return the number of bytes written to output.
   * @param input byte array containing text to be converted.
   * @param inStart begin conversion at this offset in input array.
   * @param inEnd stop conversion at this offset in input array (exclusive).
   * @param output character array to receive conversion result.
   * @param outStart start writing to output array at this offset.
   * @param outEnd stop writing to output array at this offset (exclusive).
   * @exception MalformedInputException if the input buffer contains any sequence of bytes that is
   *     illegal for the input character set.
   * @exception UnknownCharacterException for any character that that cannot be converted to
   *     Unicode. Thrown only when converter is not in substitution mode.
   * @exception ConversionBufferFullException if output array is filled prior to converting all the
   *     input.
   */
  public int convert(byte[] input, int inStart, int inEnd, char[] output, int outStart, int outEnd)
      throws UnknownCharacterException, MalformedInputException, ConversionBufferFullException {

    byteOff = inStart;
    charOff = outStart;
    // throw exceptions compatible to legacy ByteToCharXxx converters
    if (inStart >= inEnd) return 0;
    if (inStart >= input.length) throw new ArrayIndexOutOfBoundsException(inStart);
    if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException();

    if (src != null && src.array() == input) src.position(inStart).limit(inEnd);
    else src = ByteBuffer.wrap(input, inStart, inEnd - inStart);
    if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd);
    else dst = CharBuffer.wrap(output, outStart, outEnd - outStart);

    CoderResult cr;
    try {
      cr = decoder.decode(src, dst, false);
    } catch (IllegalStateException ise) {
      cr = decoder.reset().decode(src, dst, false);
    } finally {
      byteOff = src.position();
      charOff = dst.position();
    }
    if (cr.isUnmappable()) {
      badInputLength = cr.length();
      throw new UnknownCharacterException();
    }
    if (cr.isMalformed()) {
      badInputLength = cr.length();
      throw new MalformedInputException();
    }
    if (cr.isOverflow()) throw new ConversionBufferFullException();

    // Return the length written to the output buffer
    if (cr.isUnderflow()) return charOff - outStart;
    return -1; // should be never reached
  }
  static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
    String bmpStr = new String(bmpCA);
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);

    // getBytes(csn);
    byte[] baSC = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());

    // getBytes(cs);
    baSC = bmpStr.getBytes(cs);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(sbBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();

    if (!strNIO.equals(strSC))
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());

    // new String(cs);
    strSC = new String(sbBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());

    // encode unmappable surrogates
    if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) {
      if (cs.name().equals("UTF-8")
          || // utf8 handles surrogates
          cs.name().equals("CESU-8")) // utf8 handles surrogates
      return;
      enc.replaceWith(new byte[] {(byte) 'A'});
      sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc;

      String str = "ab\uD800\uDC00\uD800\uDC00cd";
      byte[] ba = new byte[str.length() - 2];
      int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
        throw new RuntimeException("encode1(surrogates) failed  -> " + cs.name());

      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode2(surrogates) failed  -> " + cs.name());
      str = "ab\uD800B\uDC00Bcd";
      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode3(surrogates) failed  -> " + cs.name());
      /* sun.nio.cs.ArrayDeEncoder works on the assumption that the
         invoker (StringCoder) allocates enough output buf, utf8
         and double-byte coder does not check the output buffer limit.
      ba = new byte[str.length() - 1];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
          throw new RuntimeException("encode4(surrogates) failed  -> "
                                     + cs.name());
      }
      */
    }
  }
 ByteToCharConverter(Charset charset, String encoding) {
   super(encoding);
   decoder = charset.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE);
   // for compatibility to old ByteToCharASCII converter:
   if (charset.name().equals("US-ASCII")) decoder.onMalformedInput(CodingErrorAction.REPLACE);
 }
Esempio n. 17
0
 // we assume out is large enough for this conversion
 // returns number of filled chars in out buffer
 public int convert(byte[] in, int inOffset, int inLength, char[] out) {
   final ByteBuffer inBuffer = ByteBuffer.wrap(in, inOffset, inLength);
   final CharBuffer outBuffer = CharBuffer.wrap(out, 0, out.length);
   myDecoder.decode(inBuffer, outBuffer, false);
   return outBuffer.position();
 }
 /**
  * Sets converter into substitution mode. In substitution mode, the converter will replace
  * untranslatable characters in the source encoding with the substitution character set by
  * setSubstitionChars. When not in substitution mode, the converter will throw an
  * UnknownCharacterException when it encounters untranslatable input.
  *
  * @param doSub if true, enable substitution mode.
  * @see #setSubstitutionChars
  */
 public void setSubstitutionMode(boolean doSub) {
   super.setSubstitutionMode(doSub);
   if (decoder != null)
     decoder.onUnmappableCharacter(doSub ? CodingErrorAction.REPLACE : CodingErrorAction.REPORT);
 }
 /**
  * Returns the maximum number of characters needed to convert a byte. Useful for calculating the
  * maximum output buffer size needed for a particular input buffer.
  */
 public int getMaxCharsPerByte() {
   if (decoder != null) return (int) Math.ceil(decoder.maxCharsPerByte());
   else // only provided for subclasses
   return 1;
   // Until UTF-16, this will do for every encoding
 }
 /** Resets converter to its initial state. */
 public void reset() {
   super.reset();
   decoder.reset();
   src = null;
   dst = null;
 }
Esempio n. 21
0
 public void reset() {
   myDecoder.reset();
 }