Ejemplo n.º 1
0
  /**
   * Creates a string in a specfied character set.
   *
   * @param value String constant, must not be null
   * @param charsetName Name of the character set, may be null
   * @param collation Collation, may be null
   * @throws IllegalCharsetNameException If the given charset name is illegal
   * @throws UnsupportedCharsetException If no support for the named charset is available in this
   *     instance of the Java virtual machine
   * @throws RuntimeException If the given value cannot be represented in the given charset
   */
  public NlsString(String value, String charsetName, SqlCollation collation) {
    assert value != null;
    if (null != charsetName) {
      charsetName = charsetName.toUpperCase();
      this.charsetName = charsetName;
      String javaCharsetName = SqlUtil.translateCharacterSetName(charsetName);
      if (javaCharsetName == null) {
        throw new UnsupportedCharsetException(charsetName);
      }
      this.charset = Charset.forName(javaCharsetName);
      CharsetEncoder encoder = charset.newEncoder();

      // dry run to see if encoding hits any problems
      try {
        encoder.encode(CharBuffer.wrap(value));
      } catch (CharacterCodingException ex) {
        throw RESOURCE.charsetEncoding(value, javaCharsetName).ex();
      }
    } else {
      this.charsetName = null;
      this.charset = null;
    }
    this.collation = collation;
    this.value = value;
  }
Ejemplo n.º 2
0
 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   String csn = cs.name();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   long t1 = System.nanoTime() / 1000;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   long t2 = System.nanoTime() / 1000;
   t.t = (t2 - t1) / iteration;
   if (cr != CoderResult.UNDERFLOW) {
     System.out.println("ENC-----------------");
     int pos = cbf.position();
     System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff);
     throw new RuntimeException("Encoding err: " + csn);
   }
   byte[] bb = new byte[bbf.position()];
   bbf.flip();
   bbf.get(bb);
   return bb;
 }
  static void testMixed(Charset cs) throws Throwable {
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    List<Integer> cps = new ArrayList<>(0x10000);
    int off = 0;
    int cp = 0;
    while (cp < 0x10000) {
      if (enc.canEncode((char) cp)) {
        cps.add(cp);
      }
      cp++;
    }
    Collections.shuffle(cps);
    char[] bmpCA = new char[cps.size()];
    for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i);
    String bmpStr = new String(bmpCA);
    // getBytes(csn);
    byte[] bmpBA = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(bmpBA, baNIO)) {
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
    }

    // getBytes(cs);
    bmpBA = bmpStr.getBytes(cs);
    if (!Arrays.equals(bmpBA, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(bmpBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
    if (!strNIO.equals(strSC)) {
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());
    }

    // new String(cs);
    strSC = new String(bmpBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());
  }
Ejemplo n.º 4
0
  /** Generic Decoder. */
  private static final class Generic extends TextDecoder {
    /** Input cache. */
    private final byte[] cache = new byte[4];
    /** Input buffer. */
    private final ByteBuffer inc = ByteBuffer.wrap(cache);
    /** Output buffer. */
    private final CharBuffer outc = CharBuffer.wrap(new char[4]);
    /** Charset decoder. */
    private final CharsetDecoder csd;

    /**
     * Constructor.
     *
     * @param enc encoding
     * @throws IOException I/O exception
     */
    private Generic(final String enc) throws IOException {
      try {
        csd = Charset.forName(enc).newDecoder();
      } catch (final Exception ex) {
        throw new EncodingException(ex);
      }
    }

    @Override
    int read(final TextInput ti) throws IOException {
      int c = -1;
      while (++c < 4) {
        final int ch = ti.readByte();
        if (ch < 0) break;
        cache[c] = (byte) ch;
        outc.position(0);
        inc.position(0);
        inc.limit(c + 1);
        csd.reset();
        final CoderResult cr = csd.decode(inc, outc, true);
        if (cr.isMalformed()) continue;
        // return character
        int i = 0;
        final int os = outc.position();
        for (int o = 0; o < os; ++o) i |= outc.get(o) << (o << 3);
        return i;
      }
      return c == 0 ? -1 : invalid();
    }
  }
Ejemplo n.º 5
0
 public static void main(String args[]) throws Exception {
   String s = "abc\uD800\uDC00qrst"; // Valid surrogate
   char[] c = s.toCharArray();
   CharsetEncoder enc =
       Charset.forName("ISO8859_1").newEncoder().onUnmappableCharacter(CodingErrorAction.REPLACE);
   /* Process the first 4 characters, including the high surrogate
   which should be stored */
   ByteBuffer bb = ByteBuffer.allocate(10);
   CharBuffer cb = CharBuffer.wrap(c);
   cb.limit(4);
   enc.encode(cb, bb, false);
   cb.limit(7);
   enc.encode(cb, bb, true);
   byte[] first = bb.array();
   for (int i = 0; i < 7; i++)
     System.err.printf("[%d]=%d was %d\n", i, (int) first[i] & 0xffff, (int) c[i] & 0xffff);
 }
Ejemplo n.º 6
0
 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   return cr;
 }
  /**
   * Writes any remaining output to the output buffer and resets the converter to its initial state.
   *
   * @param output char array to receive flushed output.
   * @param outStart start writing to output array at this offset.
   * @param outEnd stop writing to output array at this offset (exclusive).
   * @exception MalformedInputException if the output to be flushed contained a partial or invalid
   *     multibyte character sequence. flush will write what it can to the output buffer and reset
   *     the converter before throwing this exception. An additional call to flush is not required.
   * @exception ConversionBufferFullException if output array is filled before all the output can be
   *     flushed. flush will write what it can to the output buffer and remember its state. An
   *     additional call to flush with a new output buffer will conclude the operation.
   */
  public int flush(char[] output, int outStart, int outEnd)
      throws MalformedInputException, ConversionBufferFullException {

    byteOff = charOff = 0;
    if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException();
    if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd);
    else dst = CharBuffer.wrap(output, outStart, outEnd - outStart);

    CoderResult cr = null;
    try {
      if (src != null) cr = decoder.decode((ByteBuffer) src.clear(), dst, true);
      assert !cr.isUnmappable();
      if (cr.isMalformed()) {
        badInputLength = cr.length();
        reset();
        throw new MalformedInputException();
      }
    } catch (IllegalStateException ise) {
      if (src != null) cr = decoder.reset().decode(src, dst, true);
    }
    try {
      cr = decoder.flush(dst);
    } catch (Exception e) {
      assert false;
    } finally {
      byteOff = 0;
      charOff = dst.position();
      src = null;
    }
    if (cr.isOverflow()) throw new ConversionBufferFullException();

    // Return the length written to the output buffer
    if (cr.isUnderflow()) {
      int written = charOff - outStart;
      reset();
      return written;
    }
    assert false;
    return -1; // should be never reached
  }
  /**
   * Converts an array of bytes containing characters in an external encoding into an array of
   * Unicode characters. This method allows a buffer by buffer conversion of a data stream. The
   * state of the conversion is saved between calls to convert. Among other things, this means
   * multibyte input sequences can be split between calls. If a call to convert results in an
   * exception, the conversion may be continued by calling convert again with suitably modified
   * parameters. All conversions should be finished with a call to the flush method.
   *
   * @return the number of bytes written to output.
   * @param input byte array containing text to be converted.
   * @param inStart begin conversion at this offset in input array.
   * @param inEnd stop conversion at this offset in input array (exclusive).
   * @param output character array to receive conversion result.
   * @param outStart start writing to output array at this offset.
   * @param outEnd stop writing to output array at this offset (exclusive).
   * @exception MalformedInputException if the input buffer contains any sequence of bytes that is
   *     illegal for the input character set.
   * @exception UnknownCharacterException for any character that that cannot be converted to
   *     Unicode. Thrown only when converter is not in substitution mode.
   * @exception ConversionBufferFullException if output array is filled prior to converting all the
   *     input.
   */
  public int convert(byte[] input, int inStart, int inEnd, char[] output, int outStart, int outEnd)
      throws UnknownCharacterException, MalformedInputException, ConversionBufferFullException {

    byteOff = inStart;
    charOff = outStart;
    // throw exceptions compatible to legacy ByteToCharXxx converters
    if (inStart >= inEnd) return 0;
    if (inStart >= input.length) throw new ArrayIndexOutOfBoundsException(inStart);
    if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException();

    if (src != null && src.array() == input) src.position(inStart).limit(inEnd);
    else src = ByteBuffer.wrap(input, inStart, inEnd - inStart);
    if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd);
    else dst = CharBuffer.wrap(output, outStart, outEnd - outStart);

    CoderResult cr;
    try {
      cr = decoder.decode(src, dst, false);
    } catch (IllegalStateException ise) {
      cr = decoder.reset().decode(src, dst, false);
    } finally {
      byteOff = src.position();
      charOff = dst.position();
    }
    if (cr.isUnmappable()) {
      badInputLength = cr.length();
      throw new UnknownCharacterException();
    }
    if (cr.isMalformed()) {
      badInputLength = cr.length();
      throw new MalformedInputException();
    }
    if (cr.isOverflow()) throw new ConversionBufferFullException();

    // Return the length written to the output buffer
    if (cr.isUnderflow()) return charOff - outStart;
    return -1; // should be never reached
  }
  static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
    String bmpStr = new String(bmpCA);
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);

    // getBytes(csn);
    byte[] baSC = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());

    // getBytes(cs);
    baSC = bmpStr.getBytes(cs);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(sbBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();

    if (!strNIO.equals(strSC))
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());

    // new String(cs);
    strSC = new String(sbBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());

    // encode unmappable surrogates
    if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) {
      if (cs.name().equals("UTF-8")
          || // utf8 handles surrogates
          cs.name().equals("CESU-8")) // utf8 handles surrogates
      return;
      enc.replaceWith(new byte[] {(byte) 'A'});
      sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc;

      String str = "ab\uD800\uDC00\uD800\uDC00cd";
      byte[] ba = new byte[str.length() - 2];
      int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
        throw new RuntimeException("encode1(surrogates) failed  -> " + cs.name());

      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode2(surrogates) failed  -> " + cs.name());
      str = "ab\uD800B\uDC00Bcd";
      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode3(surrogates) failed  -> " + cs.name());
      /* sun.nio.cs.ArrayDeEncoder works on the assumption that the
         invoker (StringCoder) allocates enough output buf, utf8
         and double-byte coder does not check the output buffer limit.
      ba = new byte[str.length() - 1];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
          throw new RuntimeException("encode4(surrogates) failed  -> "
                                     + cs.name());
      }
      */
    }
  }
Ejemplo n.º 10
0
 // we assume out is large enough for this conversion
 // returns number of filled chars in out buffer
 public int convert(byte[] in, int inOffset, int inLength, char[] out) {
   final ByteBuffer inBuffer = ByteBuffer.wrap(in, inOffset, inLength);
   final CharBuffer outBuffer = CharBuffer.wrap(out, 0, out.length);
   myDecoder.decode(inBuffer, outBuffer, false);
   return outBuffer.position();
 }