private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { CharsetDecoder decoder = DECODER_FACTORY.get(); if (replace) { decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } String str = decoder.decode(utf8).toString(); // set decoder back to its default value: REPORT if (replace) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } return str; }
private static String decode(final ByteBuffer b, final Charset charset) throws CharacterCodingException { final CharsetDecoder d = charset.newDecoder(); d.onMalformedInput(CodingErrorAction.REPORT); d.onUnmappableCharacter(CodingErrorAction.REPORT); return d.decode(b).toString(); }
public String getPayloadTracingString() { if (null == payload || 0 == payload.length) return "no payload"; boolean text = true; for (byte b : payload) { if (' ' > b) { switch (b) { case '\t': case '\n': case '\r': continue; } text = false; break; } } if (text) { CharsetDecoder decoder = CoAP.UTF8_CHARSET.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); ByteBuffer in = ByteBuffer.wrap(payload); CharBuffer out = CharBuffer.allocate(24); CoderResult result = decoder.decode(in, out, true); decoder.flush(out); out.flip(); if (CoderResult.OVERFLOW == result) { return "\"" + out + "\".. " + payload.length + " bytes"; } else if (!result.isError()) { return "\"" + out + "\""; } } return Utils.toHexText(payload, 256); }
private void assertValidReplaceKind(File repositoryRoot, long revision, SVNNodeKind kind) throws SVNException { final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder(); decoder.onMalformedInput(CodingErrorAction.IGNORE); decoder.onUnmappableCharacter(CodingErrorAction.IGNORE); final File revisionNodeFile = new File(repositoryRoot, "db/revs/0/" + revision); final InputStream in = SVNFileUtil.openFileForReading(revisionNodeFile); final StringBuffer buffer = new StringBuffer(); final String replaceString = "replace-" + kind; boolean replaceLineFound = false; try { while (true) { final String line = SVNFileUtil.readLineFromStream(in, buffer, decoder); if (line == null) { break; } buffer.setLength(0); if (line.indexOf(replaceString) >= 0) { replaceLineFound = true; break; } } } catch (IOException e) { SVNErrorManager.error(SVNErrorMessage.create(SVNErrorCode.UNKNOWN, e), e, SVNLogType.CLIENT); } finally { SVNFileUtil.closeFile(in); } Assert.assertTrue( "Could not find 'replace-" + kind + "' string in revision node file", replaceLineFound); }
/** Create a new WriterOutputStream which writes to the given writer. */ public WriterOutputStream(Writer writer) { this.writer = writer; decoder.reset(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); inBuffer = ByteBuffer.allocate(4096); inBuffer.clear(); outBuffer = CharBuffer.allocate(4096); outBuffer.clear(); }
@JRubyMethod public IRubyObject initialize(IRubyObject arg1, IRubyObject arg2, Block unusedBlock) { Ruby runtime = getRuntime(); if (!arg1.respondsTo("to_str")) { throw runtime.newTypeError("can't convert " + arg1.getMetaClass() + " into String"); } if (!arg2.respondsTo("to_str")) { throw runtime.newTypeError("can't convert " + arg2.getMetaClass() + " into String"); } String to = arg1.convertToString().toString(); String from = arg2.convertToString().toString(); try { fromEncoding = Charset.forName(getCharset(from)).newDecoder(); toEncoding = Charset.forName(getCharset(to)).newEncoder(); count = 0; if (isIgnore(to)) { fromEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE); fromEncoding.onMalformedInput(CodingErrorAction.IGNORE); toEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE); toEncoding.onMalformedInput(CodingErrorAction.IGNORE); } else { fromEncoding.onUnmappableCharacter(CodingErrorAction.REPORT); fromEncoding.onMalformedInput(CodingErrorAction.REPORT); toEncoding.onUnmappableCharacter(CodingErrorAction.REPORT); toEncoding.onMalformedInput(CodingErrorAction.REPORT); } } catch (IllegalCharsetNameException e) { throw runtime.newInvalidEncoding("invalid encoding"); } catch (UnsupportedCharsetException e) { throw runtime.newInvalidEncoding("invalid encoding"); } catch (Exception e) { throw runtime.newSystemCallError(e.toString()); } return this; }
static char[] decode(Charset cs, byte[] ba, int off, int len) { // (1)We never cache the "external" cs, the only benefit of creating // an additional StringDe/Encoder object to wrap it is to share the // de/encode() method. These SD/E objects are short-lifed, the young-gen // gc should be able to take care of them well. But the best approash // is still not to generate them if not really necessary. // (2)The defensive copy of the input byte/char[] has a big performance // impact, as well as the outgoing result byte/char[]. Need to do the // optimization check of (sm==null && classLoader0==null) for both. // (3)getClass().getClassLoader0() is expensive // (4)There might be a timing gap in isTrusted setting. getClassLoader0() // is only chcked (and then isTrusted gets set) when (SM==null). It is // possible that the SM==null for now but then SM is NOT null later // when safeTrim() is invoked...the "safe" way to do is to redundant // check (... && (isTrusted || SM == null || getClassLoader0())) in trim // but it then can be argued that the SM is null when the opertaion // is started... CharsetDecoder cd = cs.newDecoder(); int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; if (len == 0) return ca; boolean isTrusted = false; if (System.getSecurityManager() != null) { if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) { ba = Arrays.copyOfRange(ba, off, off + len); off = 0; } } cd.onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE) .reset(); if (cd instanceof ArrayDecoder) { int clen = ((ArrayDecoder) cd).decode(ba, off, len, ca); return safeTrim(ca, clen, cs, isTrusted); } else { ByteBuffer bb = ByteBuffer.wrap(ba, off, len); CharBuffer cb = CharBuffer.wrap(ca); try { CoderResult cr = cd.decode(bb, cb, true); if (!cr.isUnderflow()) cr.throwException(); cr = cd.flush(cb); if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new Error(x); } return safeTrim(ca, cb.position(), cs, isTrusted); } }
public void setCharset(String encoding) { Log.d("ConnectBot.Relay", "changing charset to " + encoding); Charset charset; if (encoding.equals("CP437")) charset = new IBM437("IBM437", new String[] {"IBM437", "CP437"}); else charset = Charset.forName(encoding); if (charset == currentCharset || charset == null) return; CharsetDecoder newCd = charset.newDecoder(); newCd.onUnmappableCharacter(CodingErrorAction.REPLACE); newCd.onMalformedInput(CodingErrorAction.REPLACE); currentCharset = charset; synchronized (this) { decoder = newCd; } }
/** * @return the {@link CharsetDecoder} that should be used when converting content from binary to * character */ private CharsetDecoder getDecoder() { if (decoder == null) { decoder = decoders.get(encoding); if (decoder == null) { final Charset cs = Charsets.lookupCharset(encoding); decoder = cs.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decoders.put(encoding, decoder); } else { decoder.reset(); } } return decoder; }
/** * Convert text in a given character set to a Unicode string. Any invalid characters are replaced * with U+FFFD. Returns null if the character set is not recognized. * * @param text ByteBuffer containing the character array to convert. * @param charsetName Character set it's in encoded in. * @return: Unicode string on success, null on failure. */ @CalledByNative private static String convertToUnicodeWithSubstitutions(ByteBuffer text, String charsetName) { try { Charset charset = Charset.forName(charsetName); // TODO(mmenke): Investigate if Charset.decode() can be used // instead. The question is whether it uses the proper replace // character. JDK CharsetDecoder docs say U+FFFD is the default, // but Charset.decode() docs say it uses the "charset's default // replacement byte array". CharsetDecoder decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); decoder.replaceWith("\uFFFD"); return decoder.decode(text).toString(); } catch (Exception e) { return null; } }
/** * Decodes a given URL-encoded path using a given character encoding. * * @param path the URL-encoded path, can be <code>null</code>; * @param encoding the character encoding to use, cannot be <code>null</code>. * @return the decoded path, can be <code>null</code> only if the given path was <code>null</code> * . */ public static String decodePath(String path, String encoding) { // Special cases... if (path == null) { return null; } CharsetDecoder decoder = Charset.forName(encoding).newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); int len = path.length(); ByteBuffer buf = ByteBuffer.allocate(len); StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i++) { char ch = path.charAt(i); if (ch == '%' && (i + 2 < len)) { // URL-encoded char... buf.put((byte) ((16 * hexVal(path, ++i)) + hexVal(path, ++i))); } else { if (buf.position() > 0) { // flush encoded chars first... sb.append(decode(buf, decoder)); buf.clear(); } sb.append(ch); } } // flush trailing encoded characters... if (buf.position() > 0) { sb.append(decode(buf, decoder)); buf.clear(); } return sb.toString(); }
public void processData() throws CannotReadException { CharsetDecoder decoder = Charset.forName("ISO-8859-1").newDecoder(); try { majorBrand = decoder.decode((ByteBuffer) dataBuffer.slice().limit(MAJOR_BRAND_LENGTH)).toString(); } catch (CharacterCodingException cee) { // Ignore } dataBuffer.position(dataBuffer.position() + MAJOR_BRAND_LENGTH); majorBrandVersion = Utils.getIntBE( dataBuffer, dataBuffer.position(), (dataBuffer.position() + MAJOR_BRAND_VERSION_LENGTH - 1)); dataBuffer.position(dataBuffer.position() + MAJOR_BRAND_VERSION_LENGTH); while ((dataBuffer.position() < dataBuffer.limit()) && (dataBuffer.limit() - dataBuffer.position() >= COMPATIBLE_BRAND_LENGTH)) { decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onMalformedInput(CodingErrorAction.REPORT); try { String brand = decoder .decode((ByteBuffer) dataBuffer.slice().limit(COMPATIBLE_BRAND_LENGTH)) .toString(); // Sometimes just extra groups of four nulls if (!brand.equals("\u0000\u0000\u0000\u0000")) { compatibleBrands.add(brand); } } catch (CharacterCodingException cee) { // Ignore } dataBuffer.position(dataBuffer.position() + COMPATIBLE_BRAND_LENGTH); } }
public UTF8Coder() { decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
public WriterOutputStream(Writer out, Charset charset) { this.writer = out; decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
ByteToCharConverter(Charset charset, String encoding) { super(encoding); decoder = charset.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE); // for compatibility to old ByteToCharASCII converter: if (charset.name().equals("US-ASCII")) decoder.onMalformedInput(CodingErrorAction.REPLACE); }
public WriterOutputStream(Writer out) { this.writer = out; decoder = DEFAULT_CHARSET.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPLACE); decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); }
public static String[] splitNotRegexWithEncoding( byte[] bline, String encoding, String separatorChars) throws UnsupportedEncodingException { if (bline == null) { return null; } ByteBuffer line = ByteBuffer.wrap(bline); byte[] sep = null; CharsetDecoder decoder = null; if (encoding != null) { sep = separatorChars.getBytes(encoding); decoder = Charset.forName(encoding).newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); decoder.onUnmappableCharacter(CodingErrorAction.REPORT); } else { sep = separatorChars.getBytes(); } if (sep.length == 0) { String[] result = new String[1]; result[0] = new String(bline, encoding); return result; } CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); // $NON-NLS-1$ utf8Decoder.onMalformedInput(CodingErrorAction.REPORT); utf8Decoder.onUnmappableCharacter(CodingErrorAction.REPORT); ArrayList<String> substrings = new ArrayList<String>(); int lineLength = line.limit(); int sepCursor = 0; int fieldCursor = 0; byte[] fieldBytes = new byte[lineLength]; while (line.position() < line.limit()) { if (sepCursor < sep.length) { byte currentByte = line.get(); if (currentByte == sep[sepCursor]) { sepCursor++; } else { sepCursor = 0; fieldBytes[fieldCursor++] = currentByte; } } else { // we found a new field if (fieldCursor > 0) { substrings.add( newStringFromSplit(decoder, utf8Decoder, encoding, fieldBytes, fieldCursor)); fieldCursor = 0; } else { // empty field substrings.add(""); // $NON-NLS-1$ } sepCursor = 0; } } if (fieldCursor > 0) { substrings.add(newStringFromSplit(decoder, utf8Decoder, encoding, fieldBytes, fieldCursor)); } if (sepCursor == sep.length) { substrings.add(""); // $NON-NLS-1$ } int resultSize = substrings.size(); if (resultSize == 0) { // no delimiter found so we have only one column String[] result = new String[1]; result[0] = new String(bline, encoding); return result; } String[] result = new String[resultSize]; substrings.toArray(result); return result; }