public void output(int x) { try { output(out, x); } catch (IOException ex) { IO.exception(ex); } }
public static String decode(byte[] bytes) { try { char[] chars = new char[bytes.length]; InputStream in = new ByteArrayInputStream(bytes); Reader r = new InStreamUTF8(in); int len; len = r.read(chars); IO.close(r); return new String(chars, 0, len); } catch (IOException ex) { IO.exception(ex); return null; } }
/** * Next codepoint, given the first byte of any UTF-8 byte sequence is already known. Not * necessarily a valid char (this function can be used as a straight UTF8 decoder). */ private static final int advance(InputStreamBuffered input, int x) { // count++ ; // ASCII Fastpath if (x == -1 || (x >= 0 && x <= 127)) { // count++ ; return x; } // 10 => extension byte // 110..... => 2 bytes if ((x & 0xE0) == 0xC0) { int ch = readMultiBytes(input, x & 0x1F, 2); // count += 2 ; return ch; } // 1110.... => 3 bytes : 16 bits : not outside 16bit chars if ((x & 0xF0) == 0xE0) { int ch = readMultiBytes(input, x & 0x0F, 3); // count += 3 ; // if ( ! Character.isDefined(ch) ) throw new // AtlasException(String.format("Undefined codepoint: 0x%04X", ch)) // ; return ch; } // Looking like 4 byte character. int ch = -2; // 11110zzz => 4 bytes. if ((x & 0xF8) == 0xF0) { ch = readMultiBytes(input, x & 0x08, 4); // Opps - need two returns. Character.toChars(ch, chars, 0) ; // count += 4 ; } else IO.exception(new IOException("Illegal UTF-8: " + x)); // This test will go off. We're processing a 4 byte sequence but Java // only supports 16 bit chars. if (ch > Character.MAX_VALUE) throw new AtlasException("Out of range character (must use a surrogate pair)"); if (!Character.isDefined(ch)) throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch)); return ch; }