Beispiel #1
0
 private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException {
   CharsetDecoder decoder = DECODER_FACTORY.get();
   if (replace) {
     decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE);
     decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
   }
   String str = decoder.decode(utf8).toString();
   // set decoder back to its default value: REPORT
   if (replace) {
     decoder.onMalformedInput(CodingErrorAction.REPORT);
     decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
   }
   return str;
 }
 private static String decode(final ByteBuffer b, final Charset charset)
     throws CharacterCodingException {
   final CharsetDecoder d = charset.newDecoder();
   d.onMalformedInput(CodingErrorAction.REPORT);
   d.onUnmappableCharacter(CodingErrorAction.REPORT);
   return d.decode(b).toString();
 }
Beispiel #3
0
 public String getPayloadTracingString() {
   if (null == payload || 0 == payload.length) return "no payload";
   boolean text = true;
   for (byte b : payload) {
     if (' ' > b) {
       switch (b) {
         case '\t':
         case '\n':
         case '\r':
           continue;
       }
       text = false;
       break;
     }
   }
   if (text) {
     CharsetDecoder decoder = CoAP.UTF8_CHARSET.newDecoder();
     decoder.onMalformedInput(CodingErrorAction.REPORT);
     decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
     ByteBuffer in = ByteBuffer.wrap(payload);
     CharBuffer out = CharBuffer.allocate(24);
     CoderResult result = decoder.decode(in, out, true);
     decoder.flush(out);
     out.flip();
     if (CoderResult.OVERFLOW == result) {
       return "\"" + out + "\".. " + payload.length + " bytes";
     } else if (!result.isError()) {
       return "\"" + out + "\"";
     }
   }
   return Utils.toHexText(payload, 256);
 }
  private void assertValidReplaceKind(File repositoryRoot, long revision, SVNNodeKind kind)
      throws SVNException {
    final CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder();
    decoder.onMalformedInput(CodingErrorAction.IGNORE);
    decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);

    final File revisionNodeFile = new File(repositoryRoot, "db/revs/0/" + revision);
    final InputStream in = SVNFileUtil.openFileForReading(revisionNodeFile);
    final StringBuffer buffer = new StringBuffer();
    final String replaceString = "replace-" + kind;
    boolean replaceLineFound = false;
    try {
      while (true) {
        final String line = SVNFileUtil.readLineFromStream(in, buffer, decoder);
        if (line == null) {
          break;
        }
        buffer.setLength(0);
        if (line.indexOf(replaceString) >= 0) {
          replaceLineFound = true;
          break;
        }
      }
    } catch (IOException e) {
      SVNErrorManager.error(SVNErrorMessage.create(SVNErrorCode.UNKNOWN, e), e, SVNLogType.CLIENT);
    } finally {
      SVNFileUtil.closeFile(in);
    }
    Assert.assertTrue(
        "Could not find 'replace-" + kind + "' string in revision node file", replaceLineFound);
  }
 /** Create a new WriterOutputStream which writes to the given writer. */
 public WriterOutputStream(Writer writer) {
   this.writer = writer;
   decoder.reset();
   decoder.onMalformedInput(CodingErrorAction.REPLACE);
   decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
   inBuffer = ByteBuffer.allocate(4096);
   inBuffer.clear();
   outBuffer = CharBuffer.allocate(4096);
   outBuffer.clear();
 }
Beispiel #6
0
  @JRubyMethod
  public IRubyObject initialize(IRubyObject arg1, IRubyObject arg2, Block unusedBlock) {
    Ruby runtime = getRuntime();
    if (!arg1.respondsTo("to_str")) {
      throw runtime.newTypeError("can't convert " + arg1.getMetaClass() + " into String");
    }
    if (!arg2.respondsTo("to_str")) {
      throw runtime.newTypeError("can't convert " + arg2.getMetaClass() + " into String");
    }

    String to = arg1.convertToString().toString();
    String from = arg2.convertToString().toString();

    try {

      fromEncoding = Charset.forName(getCharset(from)).newDecoder();
      toEncoding = Charset.forName(getCharset(to)).newEncoder();
      count = 0;

      if (isIgnore(to)) {
        fromEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE);
        fromEncoding.onMalformedInput(CodingErrorAction.IGNORE);
        toEncoding.onUnmappableCharacter(CodingErrorAction.IGNORE);
        toEncoding.onMalformedInput(CodingErrorAction.IGNORE);
      } else {
        fromEncoding.onUnmappableCharacter(CodingErrorAction.REPORT);
        fromEncoding.onMalformedInput(CodingErrorAction.REPORT);
        toEncoding.onUnmappableCharacter(CodingErrorAction.REPORT);
        toEncoding.onMalformedInput(CodingErrorAction.REPORT);
      }
    } catch (IllegalCharsetNameException e) {
      throw runtime.newInvalidEncoding("invalid encoding");
    } catch (UnsupportedCharsetException e) {
      throw runtime.newInvalidEncoding("invalid encoding");
    } catch (Exception e) {
      throw runtime.newSystemCallError(e.toString());
    }

    return this;
  }
 static char[] decode(Charset cs, byte[] ba, int off, int len) {
   // (1)We never cache the "external" cs, the only benefit of creating
   // an additional StringDe/Encoder object to wrap it is to share the
   // de/encode() method. These SD/E objects are short-lifed, the young-gen
   // gc should be able to take care of them well. But the best approash
   // is still not to generate them if not really necessary.
   // (2)The defensive copy of the input byte/char[] has a big performance
   // impact, as well as the outgoing result byte/char[]. Need to do the
   // optimization check of (sm==null && classLoader0==null) for both.
   // (3)getClass().getClassLoader0() is expensive
   // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
   // is only chcked (and then isTrusted gets set) when (SM==null). It is
   // possible that the SM==null for now but then SM is NOT null later
   // when safeTrim() is invoked...the "safe" way to do is to redundant
   // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
   // but it then can be argued that the SM is null when the opertaion
   // is started...
   CharsetDecoder cd = cs.newDecoder();
   int en = scale(len, cd.maxCharsPerByte());
   char[] ca = new char[en];
   if (len == 0) return ca;
   boolean isTrusted = false;
   if (System.getSecurityManager() != null) {
     if (!(isTrusted = (cs.getClass().getClassLoader0() == null))) {
       ba = Arrays.copyOfRange(ba, off, off + len);
       off = 0;
     }
   }
   cd.onMalformedInput(CodingErrorAction.REPLACE)
       .onUnmappableCharacter(CodingErrorAction.REPLACE)
       .reset();
   if (cd instanceof ArrayDecoder) {
     int clen = ((ArrayDecoder) cd).decode(ba, off, len, ca);
     return safeTrim(ca, clen, cs, isTrusted);
   } else {
     ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
     CharBuffer cb = CharBuffer.wrap(ca);
     try {
       CoderResult cr = cd.decode(bb, cb, true);
       if (!cr.isUnderflow()) cr.throwException();
       cr = cd.flush(cb);
       if (!cr.isUnderflow()) cr.throwException();
     } catch (CharacterCodingException x) {
       // Substitution is always enabled,
       // so this shouldn't happen
       throw new Error(x);
     }
     return safeTrim(ca, cb.position(), cs, isTrusted);
   }
 }
Beispiel #8
0
  public void setCharset(String encoding) {
    Log.d("ConnectBot.Relay", "changing charset to " + encoding);
    Charset charset;
    if (encoding.equals("CP437")) charset = new IBM437("IBM437", new String[] {"IBM437", "CP437"});
    else charset = Charset.forName(encoding);

    if (charset == currentCharset || charset == null) return;

    CharsetDecoder newCd = charset.newDecoder();
    newCd.onUnmappableCharacter(CodingErrorAction.REPLACE);
    newCd.onMalformedInput(CodingErrorAction.REPLACE);

    currentCharset = charset;
    synchronized (this) {
      decoder = newCd;
    }
  }
  /**
   * @return the {@link CharsetDecoder} that should be used when converting content from binary to
   *     character
   */
  private CharsetDecoder getDecoder() {
    if (decoder == null) {
      decoder = decoders.get(encoding);

      if (decoder == null) {
        final Charset cs = Charsets.lookupCharset(encoding);
        decoder = cs.newDecoder();
        decoder.onMalformedInput(CodingErrorAction.REPLACE);
        decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);

        decoders.put(encoding, decoder);
      } else {
        decoder.reset();
      }
    }

    return decoder;
  }
Beispiel #10
0
  /**
   * Convert text in a given character set to a Unicode string. Any invalid characters are replaced
   * with U+FFFD. Returns null if the character set is not recognized.
   *
   * @param text ByteBuffer containing the character array to convert.
   * @param charsetName Character set it's in encoded in.
   * @return: Unicode string on success, null on failure.
   */
  @CalledByNative
  private static String convertToUnicodeWithSubstitutions(ByteBuffer text, String charsetName) {
    try {
      Charset charset = Charset.forName(charsetName);

      // TODO(mmenke):  Investigate if Charset.decode() can be used
      // instead.  The question is whether it uses the proper replace
      // character.  JDK CharsetDecoder docs say U+FFFD is the default,
      // but Charset.decode() docs say it uses the "charset's default
      // replacement byte array".
      CharsetDecoder decoder = charset.newDecoder();
      decoder.onMalformedInput(CodingErrorAction.REPLACE);
      decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
      decoder.replaceWith("\uFFFD");
      return decoder.decode(text).toString();
    } catch (Exception e) {
      return null;
    }
  }
Beispiel #11
0
  /**
   * Decodes a given URL-encoded path using a given character encoding.
   *
   * @param path the URL-encoded path, can be <code>null</code>;
   * @param encoding the character encoding to use, cannot be <code>null</code>.
   * @return the decoded path, can be <code>null</code> only if the given path was <code>null</code>
   *     .
   */
  public static String decodePath(String path, String encoding) {
    // Special cases...
    if (path == null) {
      return null;
    }

    CharsetDecoder decoder = Charset.forName(encoding).newDecoder();
    decoder.onMalformedInput(CodingErrorAction.REPORT);
    decoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    int len = path.length();
    ByteBuffer buf = ByteBuffer.allocate(len);
    StringBuilder sb = new StringBuilder();

    for (int i = 0; i < len; i++) {
      char ch = path.charAt(i);
      if (ch == '%' && (i + 2 < len)) {
        // URL-encoded char...
        buf.put((byte) ((16 * hexVal(path, ++i)) + hexVal(path, ++i)));
      } else {
        if (buf.position() > 0) {
          // flush encoded chars first...
          sb.append(decode(buf, decoder));
          buf.clear();
        }

        sb.append(ch);
      }
    }

    // flush trailing encoded characters...
    if (buf.position() > 0) {
      sb.append(decode(buf, decoder));
      buf.clear();
    }

    return sb.toString();
  }
Beispiel #12
0
  public void processData() throws CannotReadException {
    CharsetDecoder decoder = Charset.forName("ISO-8859-1").newDecoder();
    try {
      majorBrand =
          decoder.decode((ByteBuffer) dataBuffer.slice().limit(MAJOR_BRAND_LENGTH)).toString();
    } catch (CharacterCodingException cee) {
      // Ignore

    }
    dataBuffer.position(dataBuffer.position() + MAJOR_BRAND_LENGTH);

    majorBrandVersion =
        Utils.getIntBE(
            dataBuffer,
            dataBuffer.position(),
            (dataBuffer.position() + MAJOR_BRAND_VERSION_LENGTH - 1));
    dataBuffer.position(dataBuffer.position() + MAJOR_BRAND_VERSION_LENGTH);

    while ((dataBuffer.position() < dataBuffer.limit())
        && (dataBuffer.limit() - dataBuffer.position() >= COMPATIBLE_BRAND_LENGTH)) {
      decoder.onMalformedInput(CodingErrorAction.REPORT);
      decoder.onMalformedInput(CodingErrorAction.REPORT);
      try {
        String brand =
            decoder
                .decode((ByteBuffer) dataBuffer.slice().limit(COMPATIBLE_BRAND_LENGTH))
                .toString();
        // Sometimes just extra groups of four nulls
        if (!brand.equals("\u0000\u0000\u0000\u0000")) {
          compatibleBrands.add(brand);
        }
      } catch (CharacterCodingException cee) {
        // Ignore
      }
      dataBuffer.position(dataBuffer.position() + COMPATIBLE_BRAND_LENGTH);
    }
  }
Beispiel #13
0
 public UTF8Coder() {
   decoder.onMalformedInput(CodingErrorAction.REPLACE);
   decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
Beispiel #14
0
 public WriterOutputStream(Writer out, Charset charset) {
   this.writer = out;
   decoder = charset.newDecoder();
   decoder.onMalformedInput(CodingErrorAction.REPLACE);
   decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
 ByteToCharConverter(Charset charset, String encoding) {
   super(encoding);
   decoder = charset.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE);
   // for compatibility to old ByteToCharASCII converter:
   if (charset.name().equals("US-ASCII")) decoder.onMalformedInput(CodingErrorAction.REPLACE);
 }
Beispiel #16
0
 public WriterOutputStream(Writer out) {
   this.writer = out;
   decoder = DEFAULT_CHARSET.newDecoder();
   decoder.onMalformedInput(CodingErrorAction.REPLACE);
   decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
  public static String[] splitNotRegexWithEncoding(
      byte[] bline, String encoding, String separatorChars) throws UnsupportedEncodingException {
    if (bline == null) {
      return null;
    }

    ByteBuffer line = ByteBuffer.wrap(bline);

    byte[] sep = null;
    CharsetDecoder decoder = null;
    if (encoding != null) {
      sep = separatorChars.getBytes(encoding);

      decoder = Charset.forName(encoding).newDecoder();
      decoder.onMalformedInput(CodingErrorAction.REPORT);
      decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
    } else {
      sep = separatorChars.getBytes();
    }

    if (sep.length == 0) {
      String[] result = new String[1];
      result[0] = new String(bline, encoding);
      return result;
    }

    CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); // $NON-NLS-1$
    utf8Decoder.onMalformedInput(CodingErrorAction.REPORT);
    utf8Decoder.onUnmappableCharacter(CodingErrorAction.REPORT);

    ArrayList<String> substrings = new ArrayList<String>();

    int lineLength = line.limit();
    int sepCursor = 0;
    int fieldCursor = 0;
    byte[] fieldBytes = new byte[lineLength];
    while (line.position() < line.limit()) {
      if (sepCursor < sep.length) {
        byte currentByte = line.get();
        if (currentByte == sep[sepCursor]) {
          sepCursor++;
        } else {
          sepCursor = 0;
          fieldBytes[fieldCursor++] = currentByte;
        }
      } else {
        // we found a new field
        if (fieldCursor > 0) {
          substrings.add(
              newStringFromSplit(decoder, utf8Decoder, encoding, fieldBytes, fieldCursor));
          fieldCursor = 0;
        } else {
          // empty field
          substrings.add(""); // $NON-NLS-1$
        }
        sepCursor = 0;
      }
    }
    if (fieldCursor > 0) {
      substrings.add(newStringFromSplit(decoder, utf8Decoder, encoding, fieldBytes, fieldCursor));
    }
    if (sepCursor == sep.length) {
      substrings.add(""); // $NON-NLS-1$
    }

    int resultSize = substrings.size();
    if (resultSize == 0) {
      // no delimiter found so we have only one column
      String[] result = new String[1];
      result[0] = new String(bline, encoding);
      return result;
    }
    String[] result = new String[resultSize];
    substrings.toArray(result);
    return result;
  }