/** * Need to check conversion early on. There's some danger than old contents of a file are set to * empty, if an exception occurs. * * @param allText * @param encoding * @param encodingRule * @throws java.io.UnsupportedEncodingException * @throws MalformedOutputExceptionWithDetail * @deprecated - we need to find "cheaper" way to to this functionality so likely to go away in * future */ private void checkConversion(EncodingMemento memento, EncodingRule encodingRule) throws IOException { String javaEncoding = memento.getJavaCharsetName(); String detectedEncoding = memento.getDetectedCharsetName(); Charset charset = Charset.forName(javaEncoding); CharsetEncoder charsetEncoder = charset.newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.REPORT); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT); Reader reader = getResettableReader(); reader.reset(); int currentChar = reader.read(); int currentPos = 1; try { while (currentChar != -1) { // note: this can probably be made more // efficient later to // check buffer by buffer, instead of // character by character. try { boolean canConvert = charsetEncoder.canEncode((char) currentChar); if (!canConvert) { if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) { // if we're told to ignore the // encoding conversion // error, // notice we still want to detect // and log it. We simply // don't throw the exception, and // we do continue with // the // save. Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); // $NON-NLS-1$ } else { throw new MalformedOutputExceptionWithDetail( javaEncoding, detectedEncoding, currentPos); } } currentChar = reader.read(); currentPos++; } // IBM's JRE seems to throw NPE when DBCS char is given to // SBCS charsetEncoder catch (NullPointerException e) { throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$ } } // if we get all the way through loop without throwing exception, // then there must // be an error not detectable when going character by character. throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$ } finally { reader.reset(); } }
/** * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is * true, then malformed input is replaced with the substitution character, which is U+FFFD. * Otherwise the method throws a MalformedInputException. * * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit() */ public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException { CharsetEncoder encoder = ENCODER_FACTORY.get(); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray())); if (replace) { encoder.onMalformedInput(CodingErrorAction.REPORT); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return bytes; }
/** * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted * characters to and {@code cs} as the character encoding. * * @param out the target stream to write converted bytes to. * @param cs the {@code Charset} that specifies the character encoding. */ public OutputStreamWriter(OutputStream out, Charset cs) { super(out); this.out = out; encoder = cs.newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
/** * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted * characters to. The default character encoding is used. * * @param out the non-null target stream to write converted bytes to. */ public OutputStreamWriter(OutputStream out) { super(out); this.out = out; String encoding = AccessController.doPrivileged( new PriviAction<String>("file.encoding", "ISO8859_1")); // $NON-NLS-1$ //$NON-NLS-2$ encoder = Charset.forName(encoding).newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
public static CharsetEncoder getCharsetEncoder(String charsetName) { Charset charset = Charset.forName(charsetName); CharsetEncoder charsetEncoder = charset.newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE); return charsetEncoder; }
/** * Write String into byte array * * <p>It will remove a trailing null terminator if exists if the option * RemoveTrailingTerminatorOnWrite has been set. * * @return the data as a byte array in format to write to file */ public byte[] writeByteArray() { byte[] data; // Try and write to buffer using the CharSet defined by getTextEncodingCharSet() String charSetName = getTextEncodingCharSet(); try { stripTrailingNull(); // Special Handling because there is no UTF16 BOM LE charset String stringValue = (String) value; String actualCharSet = null; if (charSetName.equals(TextEncoding.CHARSET_UTF_16)) { if (TagOptionSingleton.getInstance().isEncodeUTF16BomAsLittleEndian()) { actualCharSet = TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT; } else { actualCharSet = TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT; } } // Ensure large enough for any encoding ByteBuffer outputBuffer = ByteBuffer.allocate((stringValue.length() + 3) * 3); // Ensure each string (if multiple values) is written with BOM by writing separately List<String> values = splitByNullSeperator(stringValue); checkTrailingNull(values, stringValue); // For each value for (int i = 0; i < values.size(); i++) { String next = values.get(i); if (actualCharSet != null) { if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT)) { outputBuffer.put(writeStringUTF16LEBOM(next, i, values.size())); } else if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT)) { outputBuffer.put(writeStringUTF16BEBOM(next, i, values.size())); } } else { CharsetEncoder charsetEncoder = Charset.forName(charSetName).newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.IGNORE); charsetEncoder.onUnmappableCharacter(CodingErrorAction.IGNORE); outputBuffer.put(writeString(charsetEncoder, next, i, values.size())); } } outputBuffer.flip(); data = new byte[outputBuffer.limit()]; outputBuffer.rewind(); outputBuffer.get(data, 0, outputBuffer.limit()); setSize(data.length); } // https://bitbucket.org/ijabz/jaudiotagger/issue/1/encoding-metadata-to-utf-16-can-fail-if catch (CharacterCodingException ce) { logger.severe(ce.getMessage() + ":" + charSetName + ":" + value); throw new RuntimeException(ce); } return data; }
@JRubyMethod public IRubyObject initialize(IRubyObject arg1, IRubyObject arg2, Block unusedBlock) { Ruby runtime = getRuntime(); if (!arg1.respondsTo("to_str")) { throw runtime.newTypeError("can't convert " + arg1.getMetaClass() + " into String"); } if (!arg2.respondsTo("to_str")) { throw runtime.newTypeError("can't convert " + arg2.getMetaClass() + " into String"); } String to = arg1.convertToString().toString(); String from = arg2.convertToString().toString(); try { fromEncoding = Charset.forName(getCharset(from)).newDecoder(); toEncoding = Charset.forName(getCharset(to)).newEncoder(); count = 0; if (isIgnore(to)) { fromEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE); fromEncoding.onMalformedInput(CodingErrorAction.IGNORE); toEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE); toEncoding.onMalformedInput(CodingErrorAction.IGNORE); } else { fromEncoding.onUnmappableCharacter(CodingErrorAction.REPORT); fromEncoding.onMalformedInput(CodingErrorAction.REPORT); toEncoding.onUnmappableCharacter(CodingErrorAction.REPORT); toEncoding.onMalformedInput(CodingErrorAction.REPORT); } } catch (IllegalCharsetNameException e) { throw runtime.newInvalidEncoding("invalid encoding"); } catch (UnsupportedCharsetException e) { throw runtime.newInvalidEncoding("invalid encoding"); } catch (Exception e) { throw runtime.newSystemCallError(e.toString()); } return this; }
/** * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted * characters to and {@code enc} as the character encoding. If the encoding cannot be found, an * UnsupportedEncodingException error is thrown. * * @param out the target stream to write converted bytes to. * @param enc the string describing the desired character encoding. * @throws NullPointerException if {@code enc} is {@code null}. * @throws UnsupportedEncodingException if the encoding specified by {@code enc} cannot be found. */ public OutputStreamWriter(OutputStream out, final String enc) throws UnsupportedEncodingException { super(out); if (enc == null) { throw new NullPointerException(); } this.out = out; try { encoder = Charset.forName(enc).newEncoder(); } catch (Exception e) { throw new UnsupportedEncodingException(enc); } encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
/** * Write String in UTF-BEBOM format * * <p>When this is called multiple times, all but the last value has a trailing null * * @param next * @param i * @param noOfValues * @return * @throws CharacterCodingException */ private ByteBuffer writeStringUTF16BEBOM(String next, int i, int noOfValues) throws CharacterCodingException { CharsetEncoder encoder = Charset.forName(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT).newEncoder(); encoder.onMalformedInput(CodingErrorAction.IGNORE); encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); ByteBuffer bb = null; // Add BOM if ((i + 1) == noOfValues) { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next)); } else { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0')); } bb.rewind(); return bb; }
/** * Creates ReaderInputStream from a given Reader type object with a specifed encoding. * * @param reader java.io.Reader object to be read data from. * @param encoding an encoding of the created stream. */ public ReaderInputStream(Reader reader, String encoding) { this.reader = reader; if (encoding == null) { if (reader instanceof InputStreamReader) { encoding = ((InputStreamReader) reader).getEncoding(); } else { encoding = Charset.defaultCharset().name(); } } else if (!Charset.isSupported(encoding)) { throw new IllegalArgumentException(encoding + " is not supported"); } encoder = Charset.forName(encoding).newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); try { fillByteBuffer(reader); } catch (IOException ex) { throw new RuntimeException(ex); } }
public TermSession(final boolean exitOnEOF) { mWriteCharBuffer = CharBuffer.allocate(2); mWriteByteBuffer = ByteBuffer.allocate(4); mUTF8Encoder = Charset.forName("UTF-8").newEncoder(); mUTF8Encoder.onMalformedInput(CodingErrorAction.REPLACE); mUTF8Encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); mReceiveBuffer = new byte[4 * 1024]; mByteQueue = new ByteQueue(4 * 1024); mReaderThread = new Thread() { private byte[] mBuffer = new byte[4096]; @Override public void run() { try { while (true) { int read = mTermIn.read(mBuffer); if (read == -1) { // EOF -- process exited break; } int offset = 0; while (read > 0) { int written = mByteQueue.write(mBuffer, offset, read); offset += written; read -= written; mMsgHandler.sendMessage(mMsgHandler.obtainMessage(NEW_INPUT)); } } } catch (IOException e) { } catch (InterruptedException e) { } if (exitOnEOF) mMsgHandler.sendMessage(mMsgHandler.obtainMessage(EOF)); } }; mReaderThread.setName("TermSession input reader"); mWriteQueue = new ByteQueue(4096); mWriterThread = new Thread() { private byte[] mBuffer = new byte[4096]; @Override public void run() { Looper.prepare(); mWriterHandler = new Handler() { @Override public void handleMessage(Message msg) { if (msg.what == NEW_OUTPUT) { writeToOutput(); } else if (msg.what == FINISH) { Looper.myLooper().quit(); } } }; // Drain anything in the queue from before we started writeToOutput(); Looper.loop(); } private void writeToOutput() { ByteQueue writeQueue = mWriteQueue; byte[] buffer = mBuffer; OutputStream termOut = mTermOut; int bytesAvailable = writeQueue.getBytesAvailable(); int bytesToWrite = Math.min(bytesAvailable, buffer.length); if (bytesToWrite == 0) { return; } try { writeQueue.read(buffer, 0, bytesToWrite); termOut.write(buffer, 0, bytesToWrite); termOut.flush(); } catch (IOException e) { // Ignore exception // We don't really care if the receiver isn't listening. // We just make a best effort to answer the query. e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } } }; mWriterThread.setName("TermSession output writer"); }
@Override protected void doSaveDocument( IProgressMonitor monitor, Object element, IDocument document, boolean overwrite) throws CoreException { try { IStorage storage = EditorUtils.getStorageFromInput(element); File localFile = null; if (storage == null) { localFile = EditorUtils.getLocalFileFromInput(element); if (localFile == null) { throw new DBException("Can't obtain file from editor input"); } } String encoding = (storage instanceof IEncodedStorage ? ((IEncodedStorage) storage).getCharset() : GeneralUtils.DEFAULT_FILE_CHARSET_NAME); Charset charset = Charset.forName(encoding); CharsetEncoder encoder = charset.newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPORT); byte[] bytes; ByteBuffer byteBuffer = encoder.encode(CharBuffer.wrap(document.get())); if (byteBuffer.hasArray()) { bytes = byteBuffer.array(); } else { bytes = new byte[byteBuffer.limit()]; byteBuffer.get(bytes); } InputStream stream = new ByteArrayInputStream(bytes, 0, byteBuffer.limit()); if (storage instanceof IFile) { IFile file = (IFile) storage; if (file.exists()) { // inform about the upcoming content change fireElementStateChanging(element); try { file.setContents(stream, true, true, monitor); } catch (CoreException x) { // inform about failure fireElementStateChangeFailed(element); throw x; } catch (RuntimeException x) { // inform about failure fireElementStateChangeFailed(element); throw x; } } else { try { monitor.beginTask("Save file '" + file.getName() + "'", 2000); // ContainerCreator creator = new ContainerCreator(file.getWorkspace(), // file.getParent().getFullPath()); // creator.createContainer(new SubProgressMonitor(monitor, 1000)); file.create(stream, false, monitor); } finally { monitor.done(); } } } else if (storage instanceof IPersistentStorage) { monitor.beginTask("Save document", 1); ((IPersistentStorage) storage).setContents(monitor, stream); } else if (localFile != null) { try (OutputStream os = new FileOutputStream(localFile)) { IOUtils.copyStream(stream, os); } } else { throw new DBException("Storage [" + storage + "] doesn't support save"); } } catch (Exception e) { if (e instanceof CoreException) { throw (CoreException) e; } else { throw new CoreException(GeneralUtils.makeExceptionStatus(e)); } } }
/** * Creates an instance that reads bytes from <code>delegate</code> that represent characters in * the "from" character set, and returning them to the caller as characters in the "to" character * set. Any characters that cannot be represented in the "to" character set will be replaced by * <code>replacement</code> */ public TranslatingInputStream(InputStream delegate, Charset from, Charset to, char replacement) { this(delegate, from, to); _encoder.replaceWith(encodeReplacement(to, replacement)); _encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
private void dump(OutputStream outputStream, EncodingRule encodingRule, boolean use3ByteBOMifUTF8) throws CoreException, IOException { getCurrentEncodingMemento(); String javaEncodingName = null; if (encodingRule == EncodingRule.CONTENT_BASED) { if (fCurrentEncodingMemento.isValid()) { javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName(); } else { throw new UnsupportedCharsetExceptionWithDetail(fCurrentEncodingMemento); } } else if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName(); else if (encodingRule == EncodingRule.FORCE_DEFAULT) javaEncodingName = fCurrentEncodingMemento.getAppropriateDefault(); // write appropriate "header" unicode BOM bytes // Note: Java seems to write appropriate header for // UTF-16, but not // UTF-8 nor UTF-16BE. This // may vary by JRE version, so need to test well. // Note: javaEncodingName can be null in invalid // cases, so we no hard // to skip whole check if that's the case. if (javaEncodingName != null) { if ((javaEncodingName.equals(UTF_8_CHARSET_NAME) && use3ByteBOMifUTF8) || (javaEncodingName.equals(UTF_8_CHARSET_NAME) && fCurrentEncodingMemento.isUTF83ByteBOMUsed())) { outputStream.write(UTF3BYTEBOM); } else if (javaEncodingName.equals(UTF_16LE_CHARSET_NAME)) { outputStream.write(UTF16LEBOM); } else if (javaEncodingName.equals(UTF_16BE_CHARSET_NAME)) { outputStream.write(UTF16BEBOM); } } // TODO add back in line delimiter handling the // "right" way (updating // markers, not requiring string, etc. .. may need // to move to document // level) // allTextBuffer = // handleLineDelimiter(allTextBuffer, document); Reader reader = getResettableReader(); // be sure to test large "readers" ... we'll need // to make sure they all // can reset to initial position (StringReader, // CharArrayReader, and // DocumentReader should all work ok). reader.reset(); // There must be cleaner logic somehow, but the // idea is that // javaEncodingName can be null // if original detected encoding is not valid (and // if FORCE_DEFAULT was // not specified). Hence, we WANT the first // Charset.forName to // throw appropriate exception. Charset charset = null; // this call checks "override" properties file javaEncodingName = CodedIO.getAppropriateJavaCharset(javaEncodingName); if (javaEncodingName == null) { charset = Charset.forName(fCurrentEncodingMemento.getDetectedCharsetName()); } else { charset = Charset.forName(javaEncodingName); } CharsetEncoder charsetEncoder = charset.newEncoder(); if (!(encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)) { charsetEncoder.onMalformedInput(CodingErrorAction.REPORT); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT); } else { charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, charsetEncoder); // TODO: this may no longer be needed (and is at // least wrong spot for // it). // if (checkConversion && (!(encodingRule == // EncodingRule.IGNORE_CONVERSION_ERROR))) { // checkConversion(fCurrentEncodingMemento, // encodingRule); // } char[] charbuf = new char[CodedIO.MAX_BUF_SIZE]; int nRead = 0; try { while (nRead != -1) { nRead = reader.read(charbuf, 0, MAX_BUF_SIZE); if (nRead > 0) { outputStreamWriter.flush(); outputStreamWriter.write(charbuf, 0, nRead); } } } catch (UnmappableCharacterException e) { checkConversion(fCurrentEncodingMemento, encodingRule); } finally { // since we don't own the original output stream, we // won't close it ours. // the caller who passed it to us must close original one // when appropriate. // (but we do flush to be sure all up-to-date) outputStreamWriter.flush(); } }