/** * Need to check conversion early on. There's some danger than old contents of a file are set to * empty, if an exception occurs. * * @param allText * @param encoding * @param encodingRule * @throws java.io.UnsupportedEncodingException * @throws MalformedOutputExceptionWithDetail * @deprecated - we need to find "cheaper" way to to this functionality so likely to go away in * future */ private void checkConversion(EncodingMemento memento, EncodingRule encodingRule) throws IOException { String javaEncoding = memento.getJavaCharsetName(); String detectedEncoding = memento.getDetectedCharsetName(); Charset charset = Charset.forName(javaEncoding); CharsetEncoder charsetEncoder = charset.newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.REPORT); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT); Reader reader = getResettableReader(); reader.reset(); int currentChar = reader.read(); int currentPos = 1; try { while (currentChar != -1) { // note: this can probably be made more // efficient later to // check buffer by buffer, instead of // character by character. try { boolean canConvert = charsetEncoder.canEncode((char) currentChar); if (!canConvert) { if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) { // if we're told to ignore the // encoding conversion // error, // notice we still want to detect // and log it. We simply // don't throw the exception, and // we do continue with // the // save. Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); // $NON-NLS-1$ } else { throw new MalformedOutputExceptionWithDetail( javaEncoding, detectedEncoding, currentPos); } } currentChar = reader.read(); currentPos++; } // IBM's JRE seems to throw NPE when DBCS char is given to // SBCS charsetEncoder catch (NullPointerException e) { throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$ } } // if we get all the way through loop without throwing exception, // then there must // be an error not detectable when going character by character. throw new CharConversionErrorWithDetail(javaEncoding); // $NON-NLS-1$ } finally { reader.reset(); } }
public static void main(String args[]) throws Exception { String inputFile = "samplein.txt"; String outputFile = "sampleout.txt"; RandomAccessFile inf = new RandomAccessFile(inputFile, "r"); RandomAccessFile outf = new RandomAccessFile(outputFile, "rw"); long inputLength = new File(inputFile).length(); FileChannel inc = inf.getChannel(); FileChannel outc = outf.getChannel(); MappedByteBuffer inputData = inc.map(FileChannel.MapMode.READ_ONLY, 0, inputLength); Charset latin1 = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = latin1.newDecoder(); CharsetEncoder encoder = latin1.newEncoder(); CharBuffer cb = decoder.decode(inputData); // Process char data here ByteBuffer outputData = encoder.encode(cb); outc.write(outputData); inf.close(); outf.close(); }
/** * Creates a string in a specfied character set. * * @param value String constant, must not be null * @param charsetName Name of the character set, may be null * @param collation Collation, may be null * @throws IllegalCharsetNameException If the given charset name is illegal * @throws UnsupportedCharsetException If no support for the named charset is available in this * instance of the Java virtual machine * @throws RuntimeException If the given value cannot be represented in the given charset */ public NlsString(String value, String charsetName, SqlCollation collation) { assert value != null; if (null != charsetName) { charsetName = charsetName.toUpperCase(); this.charsetName = charsetName; String javaCharsetName = SqlUtil.translateCharacterSetName(charsetName); if (javaCharsetName == null) { throw new UnsupportedCharsetException(charsetName); } this.charset = Charset.forName(javaCharsetName); CharsetEncoder encoder = charset.newEncoder(); // dry run to see if encoding hits any problems try { encoder.encode(CharBuffer.wrap(value)); } catch (CharacterCodingException ex) { throw RESOURCE.charsetEncoding(value, javaCharsetName).ex(); } } else { this.charsetName = null; this.charset = null; } this.collation = collation; this.value = value; }
public static int encode( final CharsetEncoder ce, final char[] ca, final int off, final int len, final byte[] targetArray) { if (len == 0) { return 0; } if (ce instanceof ArrayEncoder) { return ((ArrayEncoder) ce).encode(ca, off, len, targetArray); } else { ce.reset(); ByteBuffer bb = ByteBuffer.wrap(targetArray); CharBuffer cb = CharBuffer.wrap(ca, off, len); try { CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) { cr.throwException(); } cr = ce.flush(bb); if (!cr.isUnderflow()) { cr.throwException(); } } catch (CharacterCodingException x) { throw new Error(x); } return bb.position(); } }
/** Saves the file back to disk. */ private void saveFile() { File file = getTargetFile().getFile(); Writer writer = null; Cursor originalcursor = this.getOwner().getCursor(); this.getOwner().setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); try { CharsetEncoder encoder = getCharset().newEncoder(); if (encoder.canEncode(textArea.getText())) { writer = new OutputStreamWriter(new FileOutputStream(file), getCharset()); textArea.write(writer); messageBox.clear(); } else { findIllegalCharacter(encoder); } } catch (FileNotFoundException fnfx) { // happens also if file is RO logger.error("FileNotFoundException saving to " + file.getPath() + ": " + fnfx.getMessage()); messageBox.error(getLocalizer().localize("message.file-removed")); } catch (IOException iox) { logger.error("IOException saving file " + file.getPath() + ": " + iox.getMessage()); messageBox.error( getLocalizer() .localize("message.file-processing-error", new Object[] {iox.getMessage()})); } finally { this.getOwner().setCursor(originalcursor); if (writer != null) try { writer.close(); } catch (IOException iox) { logger.warn("IOException closing stream: " + iox.getMessage()); } } }
public void flush() throws IOException { // Log.i("PackageManager", "flush mPos=" + mPos); if (mPos > 0) { if (mOutputStream != null) { CharBuffer charBuffer = CharBuffer.wrap(mText, 0, mPos); CoderResult result = mCharset.encode(charBuffer, mBytes, true); while (true) { if (result.isError()) { throw new IOException(result.toString()); } else if (result.isOverflow()) { flushBytes(); result = mCharset.encode(charBuffer, mBytes, true); continue; } break; } flushBytes(); mOutputStream.flush(); } else { mWriter.write(mText, 0, mPos); mWriter.flush(); } mPos = 0; } }
public ByteBuffer convertChunk( byte[] b, int offset, int length, ByteBuffer dst, boolean endOfInput) throws SVNException { myInputByteBuffer = allocate(myInputByteBuffer, length); myInputByteBuffer.put(b, offset, length); myInputByteBuffer.flip(); myCharBuffer = allocate(myCharBuffer, (int) (myDecoder.maxCharsPerByte() * myInputByteBuffer.remaining())); CoderResult result = myDecoder.decode(myInputByteBuffer, myCharBuffer, endOfInput); if (result.isError()) { throwException(result); } else if (result.isUnderflow()) { myInputByteBuffer.compact(); } else { myInputByteBuffer.clear(); } myCharBuffer.flip(); dst = allocate(dst, (int) (myEncoder.maxBytesPerChar() * myCharBuffer.remaining())); result = myEncoder.encode(myCharBuffer, dst, false); if (result.isError()) { throwException(result); } else if (result.isUnderflow()) { myCharBuffer.compact(); } else { myCharBuffer.clear(); } return dst; }
/** * Adds an authentication header for basic authentication * * @param con the connection * @throws OsmTransferException thrown if something went wrong. Check for nested exceptions */ protected void addBasicAuthorizationHeader(HttpURLConnection con) throws OsmTransferException { CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder(); CredentialsManagerResponse response; String token; try { synchronized (CredentialsManagerFactory.getCredentialManager()) { response = CredentialsManagerFactory.getCredentialManager() .getCredentials( RequestorType.SERVER, false /* don't know yet whether the credentials will succeed */); } } catch (CredentialsManagerException e) { throw new OsmTransferException(e); } if (response == null) { token = ":"; } else if (response.isCanceled()) { cancel = true; return; } else { String username = response.getUsername() == null ? "" : response.getUsername(); String password = response.getPassword() == null ? "" : String.valueOf(response.getPassword()); token = username + ":" + password; try { ByteBuffer bytes = encoder.encode(CharBuffer.wrap(token)); con.addRequestProperty("Authorization", "Basic " + Base64.encode(bytes)); } catch (CharacterCodingException e) { throw new OsmTransferException(e); } } }
public static void write(ICharsetCodec cc, CharBuffer cb, IUnitChain unitChain) { final CharsetEncoder encoder = cc.getEncoder(); try { IUnit unit = Util.lastUnit(unitChain); ByteBuffer bb = unit.getByteBufferForWrite(); boolean flush = false; encoder.reset(); for (; ; ) { CoderResult cr = flush ? encoder.flush(bb) : encoder.encode(cb, bb, true); unit.size(bb.position() - unit.start()); if (cr.isOverflow()) { unit = Util.appendNewUnit(unitChain); bb = unit.getByteBufferForWrite(); continue; } if (!cr.isUnderflow()) cr.throwException(); if (flush) break; else flush = true; } } catch (CharacterCodingException e) { throw new RuntimeException(e); } finally { cc.releaseEncoder(encoder); } }
/** * Determines the number of consecutive characters that are encodable using binary compaction. * * @param msg the message * @param startpos the start position within the message * @param encoding the charset used to convert the message to a byte array * @return the requested character count */ private static int determineConsecutiveBinaryCount(String msg, int startpos, Charset encoding) throws WriterException { CharsetEncoder encoder = encoding.newEncoder(); int len = msg.length(); int idx = startpos; while (idx < len) { char ch = msg.charAt(idx); int numericCount = 0; while (numericCount < 13 && isDigit(ch)) { numericCount++; // textCount++; int i = idx + numericCount; if (i >= len) { break; } ch = msg.charAt(i); } if (numericCount >= 13) { return idx - startpos; } ch = msg.charAt(idx); if (!encoder.canEncode(ch)) { throw new WriterException( "Non-encodable character detected: " + ch + " (Unicode: " + (int) ch + ')'); } idx++; } return idx - startpos; }
public ByteBuffer flush(ByteBuffer dst) throws SVNException { if (myCharBuffer != null) { CoderResult result; while (true) { result = myDecoder.flush(myCharBuffer); if (result.isError()) { throwException(result); } if (result.isUnderflow()) { break; } } myCharBuffer.flip(); dst = allocate(dst, (int) (myEncoder.maxBytesPerChar() * myCharBuffer.remaining())); result = myEncoder.encode(myCharBuffer, dst, true); if (result.isError()) { throwException(result); } while (true) { result = myEncoder.flush(dst); if (result.isError()) { throwException(result); } if (result.isUnderflow()) { break; } } } reset(); return dst; }
// Writes a C string and expands the frame if necessary // FIXME: Property strings containing nul ('\0') characters will corrupt the frame when they are // written. // Figure out how to throw an exception here if any nul chars are encountered private static ByteBuffer writeCString(ByteBuffer frame, String string) { Byte b = propertyAbbreviations.get(string); if (b != null) { if (frame.remaining() < 2) frame = ByteBuffer.allocate(frame.capacity() << 1).put((ByteBuffer) frame.rewind()); frame.put(b); frame.put((byte) 0); } else { CharsetEncoder cStringEncoder = cStringCharset.newEncoder(); CharBuffer chars = CharBuffer.wrap(string); for (int size = frame.capacity(); ; cStringEncoder.flush(frame)) { cStringEncoder.reset(); if (cStringEncoder.encode(chars, frame, true) == CoderResult.OVERFLOW) { // debug output // System.out.println("overflow, reallocating to size " + (size << 1) + " (printing \"" + // string + "\")"); frame = ByteBuffer.allocate(size = (size << 1)).put((ByteBuffer) frame.rewind()); } else break; } cStringEncoder.flush(frame); frame.put((byte) 0); } return frame; }
/** * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted * characters to and {@code cs} as the character encoding. * * @param out the target stream to write converted bytes to. * @param cs the {@code Charset} that specifies the character encoding. */ public OutputStreamWriter(OutputStream out, Charset cs) { super(out); this.out = out; encoder = cs.newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
@JRubyMethod public IRubyObject convert(ThreadContext context, IRubyObject srcBuffer) { if (!(srcBuffer instanceof RubyString)) { throw context.runtime.newTypeError(srcBuffer, context.runtime.getString()); } RubyString srcString = (RubyString) srcBuffer; ByteList srcBL = srcString.getByteList(); if (srcBL.getRealSize() == 0) return context.runtime.newSymbol("source_buffer_empty"); ByteBuffer srcBB = ByteBuffer.wrap(srcBL.getUnsafeBytes(), srcBL.begin(), srcBL.getRealSize()); try { CharBuffer srcCB = CharBuffer.allocate((int) (srcDecoder.maxCharsPerByte() * srcBL.getRealSize()) + 1); CoderResult decodeResult = srcDecoder.decode(srcBB, srcCB, true); srcCB.flip(); ByteBuffer destBB = ByteBuffer.allocate((int) (destEncoder.maxBytesPerChar() * srcCB.limit()) + 1); CoderResult encodeResult = destEncoder.encode(srcCB, destBB, true); destBB.flip(); byte[] destBytes = new byte[destBB.limit()]; destBB.get(destBytes); srcDecoder.reset(); destEncoder.reset(); return context.runtime.newString(new ByteList(destBytes, destEncoding.getEncoding(), false)); } catch (Exception e) { throw context.runtime.newRuntimeError(e.getLocalizedMessage()); } }
/** * Decode byte array as specified charset, then convert to UTF-8 by encoding as UTF8 * * @param buf byte array to decode and convert to UTF8 * @param charsetName charset to decode byte array into * @return * @throws CharacterCodingException */ public static synchronized byte[] convertByteArrayToUTF8ByteArray(byte[] buf, String charsetName) throws CharacterCodingException { Charset cset; cset = Charset.forName(charsetName); // detected character set name CharsetDecoder csetDecoder = cset.newDecoder(); Charset utf8 = Charset.forName(Jeeves.ENCODING); CharsetEncoder utf8Encoder = utf8.newEncoder(); ByteBuffer inputBuffer = ByteBuffer.wrap(buf); // decode as detected character set CharBuffer data = csetDecoder.decode(inputBuffer); // encode as UTF-8 ByteBuffer outputBuffer = utf8Encoder.encode(data); // remove any nulls from the end of the encoded data why? - this is a // bug in the encoder???? could also be that the file has characters // from more than one charset? byte[] out = outputBuffer.array(); int length = out.length; while (out[length - 1] == 0) length--; byte[] result = new byte[length]; System.arraycopy(out, 0, result, 0, length); // now return the converted bytes return result; }
/** * Find potential matches for a query * * @param query The query string * @return Search results object */ public EdictSearchResults search(String query) { CharsetEncoder encoder = this.dictionary.getCharacterHandler().getCharsetEncoder(); ByteBuffer encodedQuery = null; try { encodedQuery = encoder.encode(CharBuffer.wrap(query)); } catch (CharacterCodingException e) { // If we can't encode it we can't search for it here // TODO some sort of exception return null; } try { EdictComparator comparator = this.dictionary.comparator(); int start = 0; int end = this.dictionary.getIndexSize() - 1; int match = -1; do { int current = start + ((end - start) / 2); int character = comparator.compareLeft(encodedQuery, this.dictionary.getIndexEntry(current)); if (character > 0) { start = current + 1; } else if (character < 0) { end = current - 1; } else { match = current; } } while ((start <= end) && (match == -1)); if (match != -1) { end = this.dictionary.getIndexSize() - 1; int min = match; int max = match; while ((min > 0) && (comparator.compareLeft(encodedQuery, this.dictionary.getIndexEntry(min - 1)) == 0)) { min--; } while ((max < end) && (comparator.compareLeft(encodedQuery, this.dictionary.getIndexEntry(max + 1)) == 0)) { max++; } return new EdictSearchResults(this.dictionary, encodedQuery, min, max); } } catch (CharacterCodingException e) { // Shouldn't happen. If any entries of the dictionary were broken, the term index should omit // all terms from that entry e.printStackTrace(); } return new EdictSearchResults(this.dictionary, encodedQuery, null, null); }
byte[] encode(char[] ca, int off, int len) { int en = (int) (ce.maxBytesPerChar() * len); byte[] ba = new byte[en]; if (len == 0) return ba; ce.reset(); ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca, off, len); try { CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return trim(ba, bb.position()); }
public static void saveFile(File file, String content, String charsetName) { // if (Utils.isEmpty(fileName) || Utils.isEmpty(content)) { // return; // } // logger.info("save file:" + fileName + " charset:" + charsetName); file.getParentFile().mkdirs(); Charset cs; if (null == charsetName || "".equals(charsetName)) { cs = Charset.defaultCharset(); } else { cs = Charset.forName(charsetName); } CharsetEncoder encoder = cs.newEncoder(); FileOutputStream os = null; FileChannel out = null; try { os = new FileOutputStream(file); out = os.getChannel(); out.write(encoder.encode(CharBuffer.wrap(content))); } catch (CharacterCodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { close(out); close(os); } }
private static void writeEntry( OutputStream os, Attributes.Name name, String value, CharsetEncoder encoder, ByteBuffer bBuf) throws IOException { byte[] out = name.getBytes(); if (out.length > LINE_LENGTH_LIMIT) { throw new IOException( Messages.getString( "archive.33", name, Integer.valueOf(LINE_LENGTH_LIMIT))); // $NON-NLS-1$ } os.write(out); os.write(VALUE_SEPARATOR); encoder.reset(); bBuf.clear().limit(LINE_LENGTH_LIMIT - out.length - 2); CharBuffer cBuf = CharBuffer.wrap(value); CoderResult r; while (true) { r = encoder.encode(cBuf, bBuf, true); if (CoderResult.UNDERFLOW == r) { r = encoder.flush(bBuf); } os.write(bBuf.array(), bBuf.arrayOffset(), bBuf.position()); os.write(LINE_SEPARATOR); if (CoderResult.UNDERFLOW == r) { break; } os.write(' '); bBuf.clear().limit(LINE_LENGTH_LIMIT - 1); } }
private void updateWithCharBuf() { final int reqSize = (int) charEncoder.maxBytesPerChar() * charBuff.position(); if (byteBuff.capacity() < reqSize) { byteBuff = java.nio.ByteBuffer.allocate(2 * reqSize); } // Make ready for read charBuff.flip(); final CoderResult cr = charEncoder.encode(charBuff, byteBuff, true); try { if (cr.isError()) cr.throwException(); // Make ready for read byteBuff.flip(); final byte[] byts = byteBuff.array(); final int len = byteBuff.remaining(); final int strt = byteBuff.arrayOffset(); digest.update(byts, strt, len); } catch (final CharacterCodingException e) { throw new OXFException(e); } catch (java.nio.BufferOverflowException e) { throw new OXFException(e); } catch (java.nio.BufferUnderflowException e) { throw new OXFException(e); } finally { // Make ready for write charBuff.clear(); byteBuff.clear(); } }
static byte[] encode(Charset cs, char[] ca, int off, int len) { CharsetEncoder ce = cs.newEncoder(); int en = scale(len, ce.maxBytesPerChar()); byte[] ba = new byte[en]; if (len == 0) return ba; boolean isTrusted = false; if (System.getSecurityManager() != null) { if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) { ca = Arrays.copyOfRange(ca, off, off + len); off = 0; } } ce.onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .reset(); if (ce instanceof ArrayEncoder) { int blen = ((ArrayEncoder) ce).encode(ca, off, len, ba); return safeTrim(ba, blen, cs, isTrusted); } else { ByteBuffer bb = ByteBuffer.wrap(ba); CharBuffer cb = CharBuffer.wrap(ca, off, len); try { CoderResult cr = ce.encode(cb, bb, true); if (!cr.isUnderflow()) cr.throwException(); cr = ce.flush(bb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { throw new Error(x); } return safeTrim(ba, bb.position(), cs, isTrusted); } }
@Override protected void onTextMessage(CharBuffer message) throws IOException { String temp = message.toString(); JSONObject json = (JSONObject) JSONSerializer.toJSON(temp); String type = json.getString("type").toLowerCase().replaceAll(" ", ""); String location = json.getString("location"); String lat = json.getString("lat"); String lng = json.getString("lng"); String radius = json.getString("radius"); String keywords = json.getString("keywords"); String result = null; try { if (type.equals("overview")) result = getOverViewData(location, lat, lng, radius, keywords); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); JSONObject jsonResult = new JSONObject(); jsonResult.put("error", "true"); result = jsonResult.toString(); } if (result == null || result.equals("")) { JSONObject jsonResult = new JSONObject(); jsonResult.put("error", "true"); result = jsonResult.toString(); } Charset charset = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = charset.newDecoder(); CharsetEncoder encoder = charset.newEncoder(); CharBuffer uCharBuffer = CharBuffer.wrap(result); ByteBuffer bbuf = encoder.encode(uCharBuffer); CharBuffer cbuf = decoder.decode(bbuf); getWsOutbound().writeTextMessage(cbuf); }
static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); String csn = cs.name(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } CoderResult cr = null; long t1 = System.nanoTime() / 1000; for (int i = 0; i < iteration; i++) { cbf.rewind(); bbf.clear(); enc.reset(); cr = enc.encode(cbf, bbf, true); } long t2 = System.nanoTime() / 1000; t.t = (t2 - t1) / iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("ENC-----------------"); int pos = cbf.position(); System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff); throw new RuntimeException("Encoding err: " + csn); } byte[] bb = new byte[bbf.position()]; bbf.flip(); bbf.get(bb); return bb; }
public static ByteBuffer encode(String charsetName, CharBuffer charBuffer) { try { CharsetEncoder charsetEncoder = getCharsetEncoder(charsetName); return charsetEncoder.encode(charBuffer); } catch (CharacterCodingException cce) { throw new Error(cce); } }
/** * Constructs a new OutputStreamWriter using {@code out} as the target stream to write converted * characters to. The default character encoding is used. * * @param out the non-null target stream to write converted bytes to. */ public OutputStreamWriter(OutputStream out) { super(out); this.out = out; String encoding = AccessController.doPrivileged( new PriviAction<String>("file.encoding", "ISO8859_1")); // $NON-NLS-1$ //$NON-NLS-2$ encoder = Charset.forName(encoding).newEncoder(); encoder.onMalformedInput(CodingErrorAction.REPLACE); encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
private static ImmutableSortedSet<String> getAllLetters() { ImmutableSortedSet.Builder<String> builder = ImmutableSortedSet.naturalOrder(); CharsetEncoder ce = Charset.forName("US-ASCII").newEncoder(); for (char c = 0; c < Character.MAX_VALUE; c++) { if (ce.canEncode(c)) { builder.add(("" + c).toLowerCase()); } } return builder.build(); }
public static CharsetEncoder getCharsetEncoder(String charsetName) { Charset charset = Charset.forName(charsetName); CharsetEncoder charsetEncoder = charset.newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE); return charsetEncoder; }
/** * Write String into byte array * * <p>It will remove a trailing null terminator if exists if the option * RemoveTrailingTerminatorOnWrite has been set. * * @return the data as a byte array in format to write to file */ public byte[] writeByteArray() { byte[] data; // Try and write to buffer using the CharSet defined by getTextEncodingCharSet() String charSetName = getTextEncodingCharSet(); try { stripTrailingNull(); // Special Handling because there is no UTF16 BOM LE charset String stringValue = (String) value; String actualCharSet = null; if (charSetName.equals(TextEncoding.CHARSET_UTF_16)) { if (TagOptionSingleton.getInstance().isEncodeUTF16BomAsLittleEndian()) { actualCharSet = TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT; } else { actualCharSet = TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT; } } // Ensure large enough for any encoding ByteBuffer outputBuffer = ByteBuffer.allocate((stringValue.length() + 3) * 3); // Ensure each string (if multiple values) is written with BOM by writing separately List<String> values = splitByNullSeperator(stringValue); checkTrailingNull(values, stringValue); // For each value for (int i = 0; i < values.size(); i++) { String next = values.get(i); if (actualCharSet != null) { if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_LE_ENCODING_FORMAT)) { outputBuffer.put(writeStringUTF16LEBOM(next, i, values.size())); } else if (actualCharSet.equals(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT)) { outputBuffer.put(writeStringUTF16BEBOM(next, i, values.size())); } } else { CharsetEncoder charsetEncoder = Charset.forName(charSetName).newEncoder(); charsetEncoder.onMalformedInput(CodingErrorAction.IGNORE); charsetEncoder.onUnmappableCharacter(CodingErrorAction.IGNORE); outputBuffer.put(writeString(charsetEncoder, next, i, values.size())); } } outputBuffer.flip(); data = new byte[outputBuffer.limit()]; outputBuffer.rewind(); outputBuffer.get(data, 0, outputBuffer.limit()); setSize(data.length); } // https://bitbucket.org/ijabz/jaudiotagger/issue/1/encoding-metadata-to-utf-16-can-fail-if catch (CharacterCodingException ce) { logger.severe(ce.getMessage() + ":" + charSetName + ":" + value); throw new RuntimeException(ce); } return data; }
/** INTERNAL: */ protected void writeValue(String value, boolean isAttribute, Writer writer) { CharacterEscapeHandler escapeHandler = null; if (marshaller != null) { escapeHandler = marshaller.getCharacterEscapeHandler(); } if (escapeHandler != null) { try { escapeHandler.escape(value.toCharArray(), 0, value.length(), isAttribute, writer); } catch (IOException e) { throw XMLMarshalException.marshalException(e); } return; } try { if ((null != encoder && encoder.maxBytesPerChar() < 4) || value.indexOf('"') > -1 || value.indexOf('&') > -1 || value.indexOf('<') > -1) { char[] chars = value.toCharArray(); for (int x = 0, charsSize = chars.length; x < charsSize; x++) { char character = chars[x]; switch (character) { case '&': { writer.write("&"); break; } case '<': { writer.write("<"); break; } case '"': { writer.write("""); break; } default: if (encoder.canEncode(character)) { writer.write(character); } else { writer.write("&#"); writer.write(String.valueOf((int) character)); writer.write(';'); } } } } else { writer.write(value); } } catch (IOException e) { throw XMLMarshalException.marshalException(e); } }
// check and compare canEncoding/Encoding static char[] checkEncoding(Charset oldCS, Charset newCS) throws Exception { System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name()); CharsetEncoder encOLD = oldCS.newEncoder(); CharsetEncoder encNew = newCS.newEncoder(); char[] cc = new char[0x10000]; int pos = 0; boolean is970 = "x-IBM970-Old".equals(oldCS.name()); for (char c = 0; c < 0xffff; c++) { boolean canOld = encOLD.canEncode(c); boolean canNew = encNew.canEncode(c); if (is970 && c == 0x2299) continue; if (canOld != canNew) { if (canNew) { System.out.printf(" NEW(only): "); printEntry(c, newCS); } else { if (is970) { byte[] bb = new String(new char[] {c}).getBytes(oldCS); if (bb.length == 2 && bb[0] == (byte) 0xa2 && bb[1] == (byte) 0xc1) { // we know 970 has bogus nnnn -> a2c1 -> 2299 continue; } } System.out.printf(" OLD(only): "); printEntry(c, oldCS); } } else if (canNew) { byte[] bbNew = new String(new char[] {c}).getBytes(newCS); byte[] bbOld = new String(new char[] {c}).getBytes(oldCS); if (!Arrays.equals(bbNew, bbOld)) { System.out.printf(" c->b NEW: "); printEntry(c, newCS); System.out.printf(" c->b OLD: "); printEntry(c, oldCS); } else { String sNew = new String(bbNew, newCS); String sOld = new String(bbOld, oldCS); if (!sNew.equals(sOld)) { System.out.printf(" b2c NEW (c=%x):", c & 0xffff); printEntry(sNew.charAt(0), newCS); System.out.printf(" b2c OLD:"); printEntry(sOld.charAt(0), oldCS); } } } if (canNew & canOld) { // added only both for now cc[pos++] = c; } } return Arrays.copyOf(cc, pos); }