Beispiel #1
0
 @Override
 int read(final TextInput ti) throws IOException {
   int c = -1;
   while (++c < 4) {
     final int ch = ti.readByte();
     if (ch < 0) break;
     cache[c] = (byte) ch;
     outc.position(0);
     inc.position(0);
     inc.limit(c + 1);
     csd.reset();
     final CoderResult cr = csd.decode(inc, outc, true);
     if (cr.isMalformed()) continue;
     // return character
     int i = 0;
     final int os = outc.position();
     for (int o = 0; o < os; ++o) i |= outc.get(o) << (o << 3);
     return i;
   }
   return c == 0 ? -1 : invalid();
 }
  /**
   * Writes any remaining output to the output buffer and resets the converter to its initial state.
   *
   * @param output char array to receive flushed output.
   * @param outStart start writing to output array at this offset.
   * @param outEnd stop writing to output array at this offset (exclusive).
   * @exception MalformedInputException if the output to be flushed contained a partial or invalid
   *     multibyte character sequence. flush will write what it can to the output buffer and reset
   *     the converter before throwing this exception. An additional call to flush is not required.
   * @exception ConversionBufferFullException if output array is filled before all the output can be
   *     flushed. flush will write what it can to the output buffer and remember its state. An
   *     additional call to flush with a new output buffer will conclude the operation.
   */
  public int flush(char[] output, int outStart, int outEnd)
      throws MalformedInputException, ConversionBufferFullException {

    byteOff = charOff = 0;
    if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException();
    if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd);
    else dst = CharBuffer.wrap(output, outStart, outEnd - outStart);

    CoderResult cr = null;
    try {
      if (src != null) cr = decoder.decode((ByteBuffer) src.clear(), dst, true);
      assert !cr.isUnmappable();
      if (cr.isMalformed()) {
        badInputLength = cr.length();
        reset();
        throw new MalformedInputException();
      }
    } catch (IllegalStateException ise) {
      if (src != null) cr = decoder.reset().decode(src, dst, true);
    }
    try {
      cr = decoder.flush(dst);
    } catch (Exception e) {
      assert false;
    } finally {
      byteOff = 0;
      charOff = dst.position();
      src = null;
    }
    if (cr.isOverflow()) throw new ConversionBufferFullException();

    // Return the length written to the output buffer
    if (cr.isUnderflow()) {
      int written = charOff - outStart;
      reset();
      return written;
    }
    assert false;
    return -1; // should be never reached
  }
  /**
   * Decode file charset.
   *
   * @param f File to process.
   * @return File charset.
   * @throws IOException in case of error.
   */
  public static Charset decode(File f) throws IOException {
    SortedMap<String, Charset> charsets = Charset.availableCharsets();

    String[] firstCharsets = {
      Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE"
    };

    Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size());

    for (String c : firstCharsets)
      if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c));

    orderedCharsets.addAll(charsets.values());

    try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
      FileChannel ch = raf.getChannel();

      ByteBuffer buf = ByteBuffer.allocate(4096);

      ch.read(buf);

      buf.flip();

      for (Charset charset : orderedCharsets) {
        CharsetDecoder decoder = charset.newDecoder();

        decoder.reset();

        try {
          decoder.decode(buf);

          return charset;
        } catch (CharacterCodingException ignored) {
        }
      }
    }

    return Charset.defaultCharset();
  }
  /**
   * Converts an array of bytes containing characters in an external encoding into an array of
   * Unicode characters. This method allows a buffer by buffer conversion of a data stream. The
   * state of the conversion is saved between calls to convert. Among other things, this means
   * multibyte input sequences can be split between calls. If a call to convert results in an
   * exception, the conversion may be continued by calling convert again with suitably modified
   * parameters. All conversions should be finished with a call to the flush method.
   *
   * @return the number of bytes written to output.
   * @param input byte array containing text to be converted.
   * @param inStart begin conversion at this offset in input array.
   * @param inEnd stop conversion at this offset in input array (exclusive).
   * @param output character array to receive conversion result.
   * @param outStart start writing to output array at this offset.
   * @param outEnd stop writing to output array at this offset (exclusive).
   * @exception MalformedInputException if the input buffer contains any sequence of bytes that is
   *     illegal for the input character set.
   * @exception UnknownCharacterException for any character that that cannot be converted to
   *     Unicode. Thrown only when converter is not in substitution mode.
   * @exception ConversionBufferFullException if output array is filled prior to converting all the
   *     input.
   */
  public int convert(byte[] input, int inStart, int inEnd, char[] output, int outStart, int outEnd)
      throws UnknownCharacterException, MalformedInputException, ConversionBufferFullException {

    byteOff = inStart;
    charOff = outStart;
    // throw exceptions compatible to legacy ByteToCharXxx converters
    if (inStart >= inEnd) return 0;
    if (inStart >= input.length) throw new ArrayIndexOutOfBoundsException(inStart);
    if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException();

    if (src != null && src.array() == input) src.position(inStart).limit(inEnd);
    else src = ByteBuffer.wrap(input, inStart, inEnd - inStart);
    if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd);
    else dst = CharBuffer.wrap(output, outStart, outEnd - outStart);

    CoderResult cr;
    try {
      cr = decoder.decode(src, dst, false);
    } catch (IllegalStateException ise) {
      cr = decoder.reset().decode(src, dst, false);
    } finally {
      byteOff = src.position();
      charOff = dst.position();
    }
    if (cr.isUnmappable()) {
      badInputLength = cr.length();
      throw new UnknownCharacterException();
    }
    if (cr.isMalformed()) {
      badInputLength = cr.length();
      throw new MalformedInputException();
    }
    if (cr.isOverflow()) throw new ConversionBufferFullException();

    // Return the length written to the output buffer
    if (cr.isUnderflow()) return charOff - outStart;
    return -1; // should be never reached
  }
 /** Resets converter to its initial state. */
 public void reset() {
   super.reset();
   decoder.reset();
   src = null;
   dst = null;
 }