コード例 #1
1
 /**
  * Need to check conversion early on. There's some danger than old contents of a file are set to
  * empty, if an exception occurs.
  *
  * @param allText
  * @param encoding
  * @param encodingRule
  * @throws java.io.UnsupportedEncodingException
  * @throws MalformedOutputExceptionWithDetail
  * @deprecated - we need to find "cheaper" way to to this functionality so likely to go away in
  *     future
  */
 private void checkConversion(EncodingMemento memento, EncodingRule encodingRule)
     throws IOException {
   String javaEncoding = memento.getJavaCharsetName();
   String detectedEncoding = memento.getDetectedCharsetName();
   Charset charset = Charset.forName(javaEncoding);
   CharsetEncoder charsetEncoder = charset.newEncoder();
   charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
   charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
   Reader reader = getResettableReader();
   reader.reset();
   int currentChar = reader.read();
   int currentPos = 1;
   try {
     while (currentChar != -1) {
       // note: this can probably be made more
       // efficient later to
       // check buffer by buffer, instead of
       // character by character.
       try {
         boolean canConvert = charsetEncoder.canEncode((char) currentChar);
         if (!canConvert) {
           if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) {
             // if we're told to ignore the
             // encoding conversion
             // error,
             // notice we still want to detect
             // and log it. We simply
             // don't throw the exception, and
             // we do continue with
             // the
             // save.
             Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); // $NON-NLS-1$
           } else {
             throw new MalformedOutputExceptionWithDetail(
                 javaEncoding, detectedEncoding, currentPos);
           }
         }
         currentChar = reader.read();
         currentPos++;
       }
       // IBM's JRE seems to throw NPE when DBCS char is given to
       // SBCS charsetEncoder
       catch (NullPointerException e) {
         throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$
       }
     }
     // if we get all the way through loop without throwing exception,
     // then there must
     // be an error not detectable when going character by character.
     throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$
   } finally {
     reader.reset();
   }
 }
コード例 #2
0
ファイル: Text.java プロジェクト: besn0847/Jxtadoop
 /**
  * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is
  * true, then malformed input is replaced with the substitution character, which is U+FFFD.
  * Otherwise the method throws a MalformedInputException.
  *
  * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
  */
 public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException {
   CharsetEncoder encoder = ENCODER_FACTORY.get();
   if (replace) {
     encoder.onMalformedInput(CodingErrorAction.REPLACE);
     encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
   }
   ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray()));
   if (replace) {
     encoder.onMalformedInput(CodingErrorAction.REPORT);
     encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
   }
   return bytes;
 }
コード例 #3
0
 /**
  * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted
  * characters to and {@code cs} as the character encoding.
  *
  * @param out the target stream to write converted bytes to.
  * @param cs the {@code Charset} that specifies the character encoding.
  */
 public OutputStreamWriter(OutputStream out, Charset cs) {
   super(out);
   this.out = out;
   encoder = cs.newEncoder();
   encoder.onMalformedInput(CodingErrorAction.REPLACE);
   encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
コード例 #4
0
 /**
  * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted
  * characters to. The default character encoding is used.
  *
  * @param out the non-null target stream to write converted bytes to.
  */
 public OutputStreamWriter(OutputStream out) {
   super(out);
   this.out = out;
   String encoding =
       AccessController.doPrivileged(
           new PriviAction<String>("file.encoding", "ISO8859_1")); // $NON-NLS-1$ //$NON-NLS-2$
   encoder = Charset.forName(encoding).newEncoder();
   encoder.onMalformedInput(CodingErrorAction.REPLACE);
   encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
コード例 #5
0
  public static CharsetEncoder getCharsetEncoder(String charsetName) {
    Charset charset = Charset.forName(charsetName);

    CharsetEncoder charsetEncoder = charset.newEncoder();

    charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE);
    charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

    return charsetEncoder;
  }
  /**
   * Write String into byte array
   *
   * <p>It will remove a trailing null terminator if exists if the option
   * RemoveTrailingTerminatorOnWrite has been set.
   *
   * @return the data as a byte array in format to write to file
   */
  public byte[] writeByteArray() {
    byte[] data;
    // Try and write to buffer using the CharSet defined by getTextEncodingCharSet()
    String charSetName = getTextEncodingCharSet();
    try {

      stripTrailingNull();

      // Special Handling because there is no UTF16 BOM LE charset
      String stringValue = (String) value;
      String actualCharSet = null;
      if (charSetName.equals(TextEncoding.CHARSET_UTF_16)) {
        if (TagOptionSingleton.getInstance().isEncodeUTF16BomAsLittleEndian()) {
          actualCharSet = TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT;
        } else {
          actualCharSet = TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT;
        }
      }

      // Ensure large enough for any encoding
      ByteBuffer outputBuffer = ByteBuffer.allocate((stringValue.length() + 3) * 3);

      // Ensure each string (if multiple values) is written with BOM by writing separately
      List<String> values = splitByNullSeperator(stringValue);
      checkTrailingNull(values, stringValue);

      // For each value
      for (int i = 0; i < values.size(); i++) {
        String next = values.get(i);
        if (actualCharSet != null) {
          if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT)) {
            outputBuffer.put(writeStringUTF16LEBOM(next, i, values.size()));
          } else if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT)) {
            outputBuffer.put(writeStringUTF16BEBOM(next, i, values.size()));
          }
        } else {
          CharsetEncoder charsetEncoder = Charset.forName(charSetName).newEncoder();
          charsetEncoder.onMalformedInput(CodingErrorAction.IGNORE);
          charsetEncoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
          outputBuffer.put(writeString(charsetEncoder, next, i, values.size()));
        }
      }
      outputBuffer.flip();
      data = new byte[outputBuffer.limit()];
      outputBuffer.rewind();
      outputBuffer.get(data, 0, outputBuffer.limit());
      setSize(data.length);
    }
    // https://bitbucket.org/ijabz/jaudiotagger/issue/1/encoding-metadata-to-utf-16-can-fail-if
    catch (CharacterCodingException ce) {
      logger.severe(ce.getMessage() + ":" + charSetName + ":" + value);
      throw new RuntimeException(ce);
    }
    return data;
  }
コード例 #7
0
ファイル: RubyIconv.java プロジェクト: qmx/jruby
  @JRubyMethod
  public IRubyObject initialize(IRubyObject arg1, IRubyObject arg2, Block unusedBlock) {
    Ruby runtime = getRuntime();
    if (!arg1.respondsTo("to_str")) {
      throw runtime.newTypeError("can't convert " + arg1.getMetaClass() + " into String");
    }
    if (!arg2.respondsTo("to_str")) {
      throw runtime.newTypeError("can't convert " + arg2.getMetaClass() + " into String");
    }

    String to = arg1.convertToString().toString();
    String from = arg2.convertToString().toString();

    try {

      fromEncoding = Charset.forName(getCharset(from)).newDecoder();
      toEncoding = Charset.forName(getCharset(to)).newEncoder();
      count = 0;

      if (isIgnore(to)) {
        fromEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE);
        fromEncoding.onMalformedInput(CodingErrorAction.IGNORE);
        toEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE);
        toEncoding.onMalformedInput(CodingErrorAction.IGNORE);
      } else {
        fromEncoding.onUnmappableCharacter(CodingErrorAction.REPORT);
        fromEncoding.onMalformedInput(CodingErrorAction.REPORT);
        toEncoding.onUnmappableCharacter(CodingErrorAction.REPORT);
        toEncoding.onMalformedInput(CodingErrorAction.REPORT);
      }
    } catch (IllegalCharsetNameException e) {
      throw runtime.newInvalidEncoding("invalid encoding");
    } catch (UnsupportedCharsetException e) {
      throw runtime.newInvalidEncoding("invalid encoding");
    } catch (Exception e) {
      throw runtime.newSystemCallError(e.toString());
    }

    return this;
  }
コード例 #8
0
 /**
  * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted
  * characters to and {@code enc} as the character encoding. If the encoding cannot be found, an
  * UnsupportedEncodingException error is thrown.
  *
  * @param out the target stream to write converted bytes to.
  * @param enc the string describing the desired character encoding.
  * @throws NullPointerException if {@code enc} is {@code null}.
  * @throws UnsupportedEncodingException if the encoding specified by {@code enc} cannot be found.
  */
 public OutputStreamWriter(OutputStream out, final String enc)
     throws UnsupportedEncodingException {
   super(out);
   if (enc == null) {
     throw new NullPointerException();
   }
   this.out = out;
   try {
     encoder = Charset.forName(enc).newEncoder();
   } catch (Exception e) {
     throw new UnsupportedEncodingException(enc);
   }
   encoder.onMalformedInput(CodingErrorAction.REPLACE);
   encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
  /**
   * Write String in UTF-BEBOM format
   *
   * <p>When this is called multiple times, all but the last value has a trailing null
   *
   * @param next
   * @param i
   * @param noOfValues
   * @return
   * @throws CharacterCodingException
   */
  private ByteBuffer writeStringUTF16BEBOM(String next, int i, int noOfValues)
      throws CharacterCodingException {
    CharsetEncoder encoder =
        Charset.forName(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT).newEncoder();
    encoder.onMalformedInput(CodingErrorAction.IGNORE);
    encoder.onUnmappableCharacter(CodingErrorAction.IGNORE);

    ByteBuffer bb = null;
    // Add BOM
    if ((i + 1) == noOfValues) {
      bb = encoder.encode(CharBuffer.wrap('\ufeff' + next));
    } else {
      bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0'));
    }
    bb.rewind();
    return bb;
  }
コード例 #10
0
ファイル: ReaderInputStream.java プロジェクト: ribrdb/jruby
 /**
  * Creates ReaderInputStream from a given Reader type object with a specifed encoding.
  *
  * @param reader java.io.Reader object to be read data from.
  * @param encoding an encoding of the created stream.
  */
 public ReaderInputStream(Reader reader, String encoding) {
   this.reader = reader;
   if (encoding == null) {
     if (reader instanceof InputStreamReader) {
       encoding = ((InputStreamReader) reader).getEncoding();
     } else {
       encoding = Charset.defaultCharset().name();
     }
   } else if (!Charset.isSupported(encoding)) {
     throw new IllegalArgumentException(encoding + " is not supported");
   }
   encoder = Charset.forName(encoding).newEncoder();
   encoder.onMalformedInput(CodingErrorAction.REPLACE);
   encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
   try {
     fillByteBuffer(reader);
   } catch (IOException ex) {
     throw new RuntimeException(ex);
   }
 }
  public TermSession(final boolean exitOnEOF) {
    mWriteCharBuffer = CharBuffer.allocate(2);
    mWriteByteBuffer = ByteBuffer.allocate(4);
    mUTF8Encoder = Charset.forName("UTF-8").newEncoder();
    mUTF8Encoder.onMalformedInput(CodingErrorAction.REPLACE);
    mUTF8Encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

    mReceiveBuffer = new byte[4 * 1024];
    mByteQueue = new ByteQueue(4 * 1024);
    mReaderThread =
        new Thread() {
          private byte[] mBuffer = new byte[4096];

          @Override
          public void run() {
            try {
              while (true) {
                int read = mTermIn.read(mBuffer);
                if (read == -1) {
                  // EOF -- process exited
                  break;
                }
                int offset = 0;
                while (read > 0) {
                  int written = mByteQueue.write(mBuffer, offset, read);
                  offset += written;
                  read -= written;
                  mMsgHandler.sendMessage(mMsgHandler.obtainMessage(NEW_INPUT));
                }
              }
            } catch (IOException e) {
            } catch (InterruptedException e) {
            }

            if (exitOnEOF) mMsgHandler.sendMessage(mMsgHandler.obtainMessage(EOF));
          }
        };
    mReaderThread.setName("TermSession input reader");

    mWriteQueue = new ByteQueue(4096);
    mWriterThread =
        new Thread() {
          private byte[] mBuffer = new byte[4096];

          @Override
          public void run() {
            Looper.prepare();

            mWriterHandler =
                new Handler() {
                  @Override
                  public void handleMessage(Message msg) {
                    if (msg.what == NEW_OUTPUT) {
                      writeToOutput();
                    } else if (msg.what == FINISH) {
                      Looper.myLooper().quit();
                    }
                  }
                };

            // Drain anything in the queue from before we started
            writeToOutput();

            Looper.loop();
          }

          private void writeToOutput() {
            ByteQueue writeQueue = mWriteQueue;
            byte[] buffer = mBuffer;
            OutputStream termOut = mTermOut;

            int bytesAvailable = writeQueue.getBytesAvailable();
            int bytesToWrite = Math.min(bytesAvailable, buffer.length);

            if (bytesToWrite == 0) {
              return;
            }

            try {
              writeQueue.read(buffer, 0, bytesToWrite);
              termOut.write(buffer, 0, bytesToWrite);
              termOut.flush();
            } catch (IOException e) {
              // Ignore exception
              // We don't really care if the receiver isn't listening.
              // We just make a best effort to answer the query.
              e.printStackTrace();
            } catch (InterruptedException e) {
              e.printStackTrace();
            }
          }
        };
    mWriterThread.setName("TermSession output writer");
  }
コード例 #12
0
  @Override
  protected void doSaveDocument(
      IProgressMonitor monitor, Object element, IDocument document, boolean overwrite)
      throws CoreException {
    try {
      IStorage storage = EditorUtils.getStorageFromInput(element);
      File localFile = null;
      if (storage == null) {
        localFile = EditorUtils.getLocalFileFromInput(element);
        if (localFile == null) {
          throw new DBException("Can't obtain file from editor input");
        }
      }
      String encoding =
          (storage instanceof IEncodedStorage
              ? ((IEncodedStorage) storage).getCharset()
              : GeneralUtils.DEFAULT_FILE_CHARSET_NAME);

      Charset charset = Charset.forName(encoding);

      CharsetEncoder encoder = charset.newEncoder();
      encoder.onMalformedInput(CodingErrorAction.REPLACE);
      encoder.onUnmappableCharacter(CodingErrorAction.REPORT);

      byte[] bytes;
      ByteBuffer byteBuffer = encoder.encode(CharBuffer.wrap(document.get()));
      if (byteBuffer.hasArray()) {
        bytes = byteBuffer.array();
      } else {
        bytes = new byte[byteBuffer.limit()];
        byteBuffer.get(bytes);
      }
      InputStream stream = new ByteArrayInputStream(bytes, 0, byteBuffer.limit());

      if (storage instanceof IFile) {
        IFile file = (IFile) storage;

        if (file.exists()) {

          // inform about the upcoming content change
          fireElementStateChanging(element);
          try {
            file.setContents(stream, true, true, monitor);
          } catch (CoreException x) {
            // inform about failure
            fireElementStateChangeFailed(element);
            throw x;
          } catch (RuntimeException x) {
            // inform about failure
            fireElementStateChangeFailed(element);
            throw x;
          }

        } else {
          try {
            monitor.beginTask("Save file '" + file.getName() + "'", 2000);
            // ContainerCreator creator = new ContainerCreator(file.getWorkspace(),
            // file.getParent().getFullPath());
            // creator.createContainer(new SubProgressMonitor(monitor, 1000));
            file.create(stream, false, monitor);
          } finally {
            monitor.done();
          }
        }
      } else if (storage instanceof IPersistentStorage) {
        monitor.beginTask("Save document", 1);
        ((IPersistentStorage) storage).setContents(monitor, stream);
      } else if (localFile != null) {
        try (OutputStream os = new FileOutputStream(localFile)) {
          IOUtils.copyStream(stream, os);
        }
      } else {
        throw new DBException("Storage [" + storage + "] doesn't support save");
      }
    } catch (Exception e) {
      if (e instanceof CoreException) {
        throw (CoreException) e;
      } else {
        throw new CoreException(GeneralUtils.makeExceptionStatus(e));
      }
    }
  }
コード例 #13
0
 /**
  * Creates an instance that reads bytes from <code>delegate</code> that represent characters in
  * the "from" character set, and returning them to the caller as characters in the "to" character
  * set. Any characters that cannot be represented in the "to" character set will be replaced by
  * <code>replacement</code>
  */
 public TranslatingInputStream(InputStream delegate, Charset from, Charset to, char replacement) {
   this(delegate, from, to);
   _encoder.replaceWith(encodeReplacement(to, replacement));
   _encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
コード例 #14
0
  private void dump(OutputStream outputStream, EncodingRule encodingRule, boolean use3ByteBOMifUTF8)
      throws CoreException, IOException {
    getCurrentEncodingMemento();
    String javaEncodingName = null;
    if (encodingRule == EncodingRule.CONTENT_BASED) {
      if (fCurrentEncodingMemento.isValid()) {
        javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
      } else {
        throw new UnsupportedCharsetExceptionWithDetail(fCurrentEncodingMemento);
      }
    } else if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)
      javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
    else if (encodingRule == EncodingRule.FORCE_DEFAULT)
      javaEncodingName = fCurrentEncodingMemento.getAppropriateDefault();
    // write appropriate "header" unicode BOM bytes
    // Note: Java seems to write appropriate header for
    // UTF-16, but not
    // UTF-8 nor UTF-16BE. This
    // may vary by JRE version, so need to test well.
    // Note: javaEncodingName can be null in invalid
    // cases, so we no hard
    // to skip whole check if that's the case.
    if (javaEncodingName != null) {
      if ((javaEncodingName.equals(UTF_8_CHARSET_NAME) && use3ByteBOMifUTF8)
          || (javaEncodingName.equals(UTF_8_CHARSET_NAME)
              && fCurrentEncodingMemento.isUTF83ByteBOMUsed())) {
        outputStream.write(UTF3BYTEBOM);
      } else if (javaEncodingName.equals(UTF_16LE_CHARSET_NAME)) {
        outputStream.write(UTF16LEBOM);
      } else if (javaEncodingName.equals(UTF_16BE_CHARSET_NAME)) {
        outputStream.write(UTF16BEBOM);
      }
    }
    // TODO add back in line delimiter handling the
    // "right" way (updating
    // markers, not requiring string, etc. .. may need
    // to move to document
    // level)
    // allTextBuffer =
    // handleLineDelimiter(allTextBuffer, document);
    Reader reader = getResettableReader();
    // be sure to test large "readers" ... we'll need
    // to make sure they all
    // can reset to initial position (StringReader,
    // CharArrayReader, and
    // DocumentReader should all work ok).
    reader.reset();
    // There must be cleaner logic somehow, but the
    // idea is that
    // javaEncodingName can be null
    // if original detected encoding is not valid (and
    // if FORCE_DEFAULT was
    // not specified). Hence, we WANT the first
    // Charset.forName to
    // throw appropriate exception.
    Charset charset = null;

    // this call checks "override" properties file
    javaEncodingName = CodedIO.getAppropriateJavaCharset(javaEncodingName);

    if (javaEncodingName == null) {
      charset = Charset.forName(fCurrentEncodingMemento.getDetectedCharsetName());
    } else {
      charset = Charset.forName(javaEncodingName);
    }
    CharsetEncoder charsetEncoder = charset.newEncoder();
    if (!(encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)) {
      charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
      charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    } else {
      charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE);
      charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
    }
    OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, charsetEncoder);
    // TODO: this may no longer be needed (and is at
    // least wrong spot for
    // it).
    //		if (checkConversion && (!(encodingRule ==
    // EncodingRule.IGNORE_CONVERSION_ERROR))) {
    //			checkConversion(fCurrentEncodingMemento,
    // encodingRule);
    //		}
    char[] charbuf = new char[CodedIO.MAX_BUF_SIZE];
    int nRead = 0;
    try {
      while (nRead != -1) {
        nRead = reader.read(charbuf, 0, MAX_BUF_SIZE);
        if (nRead > 0) {
          outputStreamWriter.flush();
          outputStreamWriter.write(charbuf, 0, nRead);
        }
      }
    } catch (UnmappableCharacterException e) {
      checkConversion(fCurrentEncodingMemento, encodingRule);
    } finally {
      // since we don't own the original output stream, we
      // won't close it ours.
      // the caller who passed it to us must close original one
      // when appropriate.
      // (but we do flush to be sure all up-to-date)
      outputStreamWriter.flush();
    }
  }