public void output(int x) {
   try {
     output(out, x);
   } catch (IOException ex) {
     IO.exception(ex);
   }
 }
Example #2
0
 public static String decode(byte[] bytes) {
   try {
     char[] chars = new char[bytes.length];
     InputStream in = new ByteArrayInputStream(bytes);
     Reader r = new InStreamUTF8(in);
     int len;
     len = r.read(chars);
     IO.close(r);
     return new String(chars, 0, len);
   } catch (IOException ex) {
     IO.exception(ex);
     return null;
   }
 }
Example #3
0
  /**
   * Next codepoint, given the first byte of any UTF-8 byte sequence is already known. Not
   * necessarily a valid char (this function can be used as a straight UTF8 decoder).
   */
  private static final int advance(InputStreamBuffered input, int x) {
    // count++ ;
    // ASCII Fastpath
    if (x == -1 || (x >= 0 && x <= 127)) {
      // count++ ;
      return x;
    }

    // 10 => extension byte
    // 110..... => 2 bytes
    if ((x & 0xE0) == 0xC0) {
      int ch = readMultiBytes(input, x & 0x1F, 2);
      // count += 2 ;
      return ch;
    }
    // 1110.... => 3 bytes : 16 bits : not outside 16bit chars
    if ((x & 0xF0) == 0xE0) {
      int ch = readMultiBytes(input, x & 0x0F, 3);
      // count += 3 ;
      // if ( ! Character.isDefined(ch) ) throw new
      // AtlasException(String.format("Undefined codepoint: 0x%04X", ch))
      // ;
      return ch;
    }

    // Looking like 4 byte character.
    int ch = -2;
    // 11110zzz => 4 bytes.
    if ((x & 0xF8) == 0xF0) {
      ch = readMultiBytes(input, x & 0x08, 4);
      // Opps - need two returns. Character.toChars(ch, chars, 0) ;
      // count += 4 ;
    } else IO.exception(new IOException("Illegal UTF-8: " + x));

    // This test will go off. We're processing a 4 byte sequence but Java
    // only supports 16 bit chars.
    if (ch > Character.MAX_VALUE)
      throw new AtlasException("Out of range character (must use a surrogate pair)");
    if (!Character.isDefined(ch))
      throw new AtlasException(String.format("Undefined codepoint: 0x%04X", ch));
    return ch;
  }