/** * Need to check conversion early on. There's some danger than old contents of a file are set to * empty, if an exception occurs. * * @param allText * @param encoding * @param encodingRule * @throws java.io.UnsupportedEncodingException * @throws MalformedOutputExceptionWithDetail * @deprecated - we need to find "cheaper" way to to this functionality so likely to go away in * future */ private void checkConversion(EncodingMemento memento, EncodingRule encodingRule) throws IOException { String javaEncoding = memento.getJavaCharsetName(); String detectedEncoding = memento.getDetectedCharsetName(); Charset charset = Charset.forName(javaEncoding); CharsetEncoder charsetEncoder = charset.newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.REPORT); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT); Reader reader = getResettableReader(); reader.reset(); int currentChar = reader.read(); int currentPos = 1; try { while (currentChar != -1) { // note: this can probably be made more // efficient later to // check buffer by buffer, instead of // character by character. try { boolean canConvert = charsetEncoder.canEncode((char) currentChar); if (!canConvert) { if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) { // if we're told to ignore the // encoding conversion // error, // notice we still want to detect // and log it. We simply // don't throw the exception, and // we do continue with // the // save. Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); // $NON-NLS-1$ } else { throw new MalformedOutputExceptionWithDetail( javaEncoding, detectedEncoding, currentPos); } } currentChar = reader.read(); currentPos++; } // IBM's JRE seems to throw NPE when DBCS char is given to // SBCS charsetEncoder catch (NullPointerException e) { throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$ } } // if we get all the way through loop without throwing exception, // then there must // be an error not detectable when going character by character. throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$ } finally { reader.reset(); } }
/** * The primary method which contains the highest level rules for how to decide appropriate * decoding rules: 1. first check for unicode stream 2. then looked for encoding specified in * content (according to the type of content that is it ... xml, html, jsp, etc. 3. then check for * various settings: file settings first, if null check project settings, if null, check user * preferences. 4. lastly (or, what is the last user preference) is to use "workbench defaults". */ private void analyze() throws CoreException, IOException { Reader resettableReader = getResettableReader(); try { if (fCurrentEncodingMemento == null) { resettableReader.reset(); fCurrentEncodingMemento = checkForEncodingInContents(); } // if encoding stratagy doesn't provide answer, // then try file settings, project settings, // user preferences, and // finally workbench default. // if (fCurrentEncodingMemento == null || fCurrentEncodingMemento.getDetectedCharsetName() == null) { resettableReader.reset(); fCurrentEncodingMemento = getEncodingMementoFromResourceAndPreference(); } // use DefaultNameRules from NonContentBasedEncodingRules as the // final default if (fEncodingMemento == null) { handleNotProvidedFromContentCase(); } fHasBeenAnalyzed = true; } finally { if (resettableReader != null) { resettableReader.reset(); } } }
public EncodingMemento getCurrentEncodingMemento() throws CoreException, IOException { // Assert.isNotNull(fPreviousEncodingMemento, // "previousEncodingMemento // needs to be set first"); if (!fHasBeenAnalyzed) { analyze(); } // post condition Assert.isNotNull(fCurrentEncodingMemento, "illegal post condition state"); // $NON-NLS-1$ // be sure to carry over appropriate encoding // "state" that may be // relevent. if (fPreviousEncodingMemento != null) { fCurrentEncodingMemento.setUTF83ByteBOMUsed(fPreviousEncodingMemento.isUTF83ByteBOMUsed()); } return fCurrentEncodingMemento; }
private void dump(OutputStream outputStream, EncodingRule encodingRule, boolean use3ByteBOMifUTF8) throws CoreException, IOException { getCurrentEncodingMemento(); String javaEncodingName = null; if (encodingRule == EncodingRule.CONTENT_BASED) { if (fCurrentEncodingMemento.isValid()) { javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName(); } else { throw new UnsupportedCharsetExceptionWithDetail(fCurrentEncodingMemento); } } else if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName(); else if (encodingRule == EncodingRule.FORCE_DEFAULT) javaEncodingName = fCurrentEncodingMemento.getAppropriateDefault(); // write appropriate "header" unicode BOM bytes // Note: Java seems to write appropriate header for // UTF-16, but not // UTF-8 nor UTF-16BE. This // may vary by JRE version, so need to test well. // Note: javaEncodingName can be null in invalid // cases, so we no hard // to skip whole check if that's the case. if (javaEncodingName != null) { if ((javaEncodingName.equals(UTF_8_CHARSET_NAME) && use3ByteBOMifUTF8) || (javaEncodingName.equals(UTF_8_CHARSET_NAME) && fCurrentEncodingMemento.isUTF83ByteBOMUsed())) { outputStream.write(UTF3BYTEBOM); } else if (javaEncodingName.equals(UTF_16LE_CHARSET_NAME)) { outputStream.write(UTF16LEBOM); } else if (javaEncodingName.equals(UTF_16BE_CHARSET_NAME)) { outputStream.write(UTF16BEBOM); } } // TODO add back in line delimiter handling the // "right" way (updating // markers, not requiring string, etc. .. may need // to move to document // level) // allTextBuffer = // handleLineDelimiter(allTextBuffer, document); Reader reader = getResettableReader(); // be sure to test large "readers" ... we'll need // to make sure they all // can reset to initial position (StringReader, // CharArrayReader, and // DocumentReader should all work ok). reader.reset(); // There must be cleaner logic somehow, but the // idea is that // javaEncodingName can be null // if original detected encoding is not valid (and // if FORCE_DEFAULT was // not specified). Hence, we WANT the first // Charset.forName to // throw appropriate exception. Charset charset = null; // this call checks "override" properties file javaEncodingName = CodedIO.getAppropriateJavaCharset(javaEncodingName); if (javaEncodingName == null) { charset = Charset.forName(fCurrentEncodingMemento.getDetectedCharsetName()); } else { charset = Charset.forName(javaEncodingName); } CharsetEncoder charsetEncoder = charset.newEncoder(); if (!(encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)) { charsetEncoder.onMalformedInput(CodingErrorAction.REPORT); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT); } else { charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, charsetEncoder); // TODO: this may no longer be needed (and is at // least wrong spot for // it). // if (checkConversion && (!(encodingRule == // EncodingRule.IGNORE_CONVERSION_ERROR))) { // checkConversion(fCurrentEncodingMemento, // encodingRule); // } char[] charbuf = new char[CodedIO.MAX_BUF_SIZE]; int nRead = 0; try { while (nRead != -1) { nRead = reader.read(charbuf, 0, MAX_BUF_SIZE); if (nRead > 0) { outputStreamWriter.flush(); outputStreamWriter.write(charbuf, 0, nRead); } } } catch (UnmappableCharacterException e) { checkConversion(fCurrentEncodingMemento, encodingRule); } finally { // since we don't own the original output stream, we // won't close it ours. // the caller who passed it to us must close original one // when appropriate. // (but we do flush to be sure all up-to-date) outputStreamWriter.flush(); } }