public static void main(String args[]) throws Exception { String inputFile = "samplein.txt"; String outputFile = "sampleout.txt"; RandomAccessFile inf = new RandomAccessFile(inputFile, "r"); RandomAccessFile outf = new RandomAccessFile(outputFile, "rw"); long inputLength = new File(inputFile).length(); FileChannel inc = inf.getChannel(); FileChannel outc = outf.getChannel(); MappedByteBuffer inputData = inc.map(FileChannel.MapMode.READ_ONLY, 0, inputLength); Charset latin1 = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = latin1.newDecoder(); CharsetEncoder encoder = latin1.newEncoder(); CharBuffer cb = decoder.decode(inputData); // Process char data here ByteBuffer outputData = encoder.encode(cb); outc.write(outputData); inf.close(); outf.close(); }
public static void main(String[] arguments) { try { // read byte data into a byte buffer String data = "friends.dat"; FileInputStream inData = new FileInputStream(data); FileChannel inChannel = inData.getChannel(); long inSize = inChannel.size(); ByteBuffer source = ByteBuffer.allocate((int) inSize); inChannel.read(source, 0); source.position(0); System.out.println("Original byte data:"); for (int i = 0; source.remaining() > 0; i++) { System.out.print(source.get() + " "); } // convert byte data into character data source.position(0); Charset ascii = Charset.forName("US-ASCII"); CharsetDecoder toAscii = ascii.newDecoder(); CharBuffer destination = toAscii.decode(source); destination.position(0); System.out.println("\n\nNew character data:"); for (int i = 0; destination.remaining() > 0; i++) { System.out.print(destination.get()); } System.out.println(); } catch (FileNotFoundException fne) { System.out.println(fne.getMessage()); } catch (IOException ioe) { System.out.println(ioe.getMessage()); } }
@Override int read(final TextInput ti) throws IOException { int c = -1; while (++c < 4) { final int ch = ti.readByte(); if (ch < 0) break; cache[c] = (byte) ch; outc.position(0); inc.position(0); inc.limit(c + 1); csd.reset(); final CoderResult cr = csd.decode(inc, outc, true); if (cr.isMalformed()) continue; // return character int i = 0; final int os = outc.position(); for (int o = 0; o < os; ++o) i |= outc.get(o) << (o << 3); return i; } return c == 0 ? -1 : invalid(); }
/** * Writes any remaining output to the output buffer and resets the converter to its initial state. * * @param output char array to receive flushed output. * @param outStart start writing to output array at this offset. * @param outEnd stop writing to output array at this offset (exclusive). * @exception MalformedInputException if the output to be flushed contained a partial or invalid * multibyte character sequence. flush will write what it can to the output buffer and reset * the converter before throwing this exception. An additional call to flush is not required. * @exception ConversionBufferFullException if output array is filled before all the output can be * flushed. flush will write what it can to the output buffer and remember its state. An * additional call to flush with a new output buffer will conclude the operation. */ public int flush(char[] output, int outStart, int outEnd) throws MalformedInputException, ConversionBufferFullException { byteOff = charOff = 0; if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException(); if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd); else dst = CharBuffer.wrap(output, outStart, outEnd - outStart); CoderResult cr = null; try { if (src != null) cr = decoder.decode((ByteBuffer) src.clear(), dst, true); assert !cr.isUnmappable(); if (cr.isMalformed()) { badInputLength = cr.length(); reset(); throw new MalformedInputException(); } } catch (IllegalStateException ise) { if (src != null) cr = decoder.reset().decode(src, dst, true); } try { cr = decoder.flush(dst); } catch (Exception e) { assert false; } finally { byteOff = 0; charOff = dst.position(); src = null; } if (cr.isOverflow()) throw new ConversionBufferFullException(); // Return the length written to the output buffer if (cr.isUnderflow()) { int written = charOff - outStart; reset(); return written; } assert false; return -1; // should be never reached }
/** * Decode file charset. * * @param f File to process. * @return File charset. * @throws IOException in case of error. */ public static Charset decode(File f) throws IOException { SortedMap<String, Charset> charsets = Charset.availableCharsets(); String[] firstCharsets = { Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE" }; Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size()); for (String c : firstCharsets) if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c)); orderedCharsets.addAll(charsets.values()); try (RandomAccessFile raf = new RandomAccessFile(f, "r")) { FileChannel ch = raf.getChannel(); ByteBuffer buf = ByteBuffer.allocate(4096); ch.read(buf); buf.flip(); for (Charset charset : orderedCharsets) { CharsetDecoder decoder = charset.newDecoder(); decoder.reset(); try { decoder.decode(buf); return charset; } catch (CharacterCodingException ignored) { } } } return Charset.defaultCharset(); }
/** * Converts an array of bytes containing characters in an external encoding into an array of * Unicode characters. This method allows a buffer by buffer conversion of a data stream. The * state of the conversion is saved between calls to convert. Among other things, this means * multibyte input sequences can be split between calls. If a call to convert results in an * exception, the conversion may be continued by calling convert again with suitably modified * parameters. All conversions should be finished with a call to the flush method. * * @return the number of bytes written to output. * @param input byte array containing text to be converted. * @param inStart begin conversion at this offset in input array. * @param inEnd stop conversion at this offset in input array (exclusive). * @param output character array to receive conversion result. * @param outStart start writing to output array at this offset. * @param outEnd stop writing to output array at this offset (exclusive). * @exception MalformedInputException if the input buffer contains any sequence of bytes that is * illegal for the input character set. * @exception UnknownCharacterException for any character that that cannot be converted to * Unicode. Thrown only when converter is not in substitution mode. * @exception ConversionBufferFullException if output array is filled prior to converting all the * input. */ public int convert(byte[] input, int inStart, int inEnd, char[] output, int outStart, int outEnd) throws UnknownCharacterException, MalformedInputException, ConversionBufferFullException { byteOff = inStart; charOff = outStart; // throw exceptions compatible to legacy ByteToCharXxx converters if (inStart >= inEnd) return 0; if (inStart >= input.length) throw new ArrayIndexOutOfBoundsException(inStart); if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException(); if (src != null && src.array() == input) src.position(inStart).limit(inEnd); else src = ByteBuffer.wrap(input, inStart, inEnd - inStart); if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd); else dst = CharBuffer.wrap(output, outStart, outEnd - outStart); CoderResult cr; try { cr = decoder.decode(src, dst, false); } catch (IllegalStateException ise) { cr = decoder.reset().decode(src, dst, false); } finally { byteOff = src.position(); charOff = dst.position(); } if (cr.isUnmappable()) { badInputLength = cr.length(); throw new UnknownCharacterException(); } if (cr.isMalformed()) { badInputLength = cr.length(); throw new MalformedInputException(); } if (cr.isOverflow()) throw new ConversionBufferFullException(); // Return the length written to the output buffer if (cr.isUnderflow()) return charOff - outStart; return -1; // should be never reached }