static void checkMalformed(Charset cs, byte[][] malformed) throws Exception { boolean failed = false; String csn = cs.name(); System.out.printf("Check malformed <%s>...%n", csn); for (boolean direct : new boolean[] {false, true}) { for (byte[] bins : malformed) { int mlen = bins[0]; byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); CoderResult cr = decodeCR(bin, cs, direct); String ashex = ""; for (int i = 0; i < bin.length; i++) { if (i > 0) ashex += " "; ashex += Integer.toString((int) bin[i] & 0xff, 16); } if (!cr.isMalformed()) { System.out.printf( " FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString()); failed = true; } else if (cr.length() != mlen) { System.out.printf( " FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); failed = true; } } } if (failed) throw new RuntimeException("Check malformed failed " + csn); }
static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); String csn = cs.name(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } CoderResult cr = null; long t1 = System.nanoTime() / 1000; for (int i = 0; i < iteration; i++) { cbf.rewind(); bbf.clear(); enc.reset(); cr = enc.encode(cbf, bbf, true); } long t2 = System.nanoTime() / 1000; t.t = (t2 - t1) / iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("ENC-----------------"); int pos = cbf.position(); System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff); throw new RuntimeException("Encoding err: " + csn); } byte[] bb = new byte[bbf.position()]; bbf.flip(); bbf.get(bb); return bb; }
static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) throws Exception { String csn = cs.name(); CharsetDecoder dec = cs.newDecoder(); ByteBuffer bbf; CharBuffer cbf; if (testDirect) { bbf = ByteBuffer.allocateDirect(bb.length); cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer(); bbf.put(bb); } else { bbf = ByteBuffer.wrap(bb); cbf = CharBuffer.allocate(bb.length); } CoderResult cr = null; long t1 = System.nanoTime() / 1000; for (int i = 0; i < iteration; i++) { bbf.rewind(); cbf.clear(); dec.reset(); cr = dec.decode(bbf, cbf, true); } long t2 = System.nanoTime() / 1000; t.t = (t2 - t1) / iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("DEC-----------------"); int pos = bbf.position(); System.out.printf( " cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", cr.toString(), pos, bb[pos++] & 0xff, bb[pos++] & 0xff, bb[pos++] & 0xff, bb[pos++] & 0xff); throw new RuntimeException("Decoding err: " + csn); } char[] cc = new char[cbf.position()]; cbf.flip(); cbf.get(cc); return cc; }
@Override int read(final TextInput ti) throws IOException { int c = -1; while (++c < 4) { final int ch = ti.readByte(); if (ch < 0) break; cache[c] = (byte) ch; outc.position(0); inc.position(0); inc.limit(c + 1); csd.reset(); final CoderResult cr = csd.decode(inc, outc, true); if (cr.isMalformed()) continue; // return character int i = 0; final int os = outc.position(); for (int o = 0; o < os; ++o) i |= outc.get(o) << (o << 3); return i; } return c == 0 ? -1 : invalid(); }
/** * Converts an array of bytes containing characters in an external encoding into an array of * Unicode characters. This method allows a buffer by buffer conversion of a data stream. The * state of the conversion is saved between calls to convert. Among other things, this means * multibyte input sequences can be split between calls. If a call to convert results in an * exception, the conversion may be continued by calling convert again with suitably modified * parameters. All conversions should be finished with a call to the flush method. * * @return the number of bytes written to output. * @param input byte array containing text to be converted. * @param inStart begin conversion at this offset in input array. * @param inEnd stop conversion at this offset in input array (exclusive). * @param output character array to receive conversion result. * @param outStart start writing to output array at this offset. * @param outEnd stop writing to output array at this offset (exclusive). * @exception MalformedInputException if the input buffer contains any sequence of bytes that is * illegal for the input character set. * @exception UnknownCharacterException for any character that that cannot be converted to * Unicode. Thrown only when converter is not in substitution mode. * @exception ConversionBufferFullException if output array is filled prior to converting all the * input. */ public int convert(byte[] input, int inStart, int inEnd, char[] output, int outStart, int outEnd) throws UnknownCharacterException, MalformedInputException, ConversionBufferFullException { byteOff = inStart; charOff = outStart; // throw exceptions compatible to legacy ByteToCharXxx converters if (inStart >= inEnd) return 0; if (inStart >= input.length) throw new ArrayIndexOutOfBoundsException(inStart); if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException(); if (src != null && src.array() == input) src.position(inStart).limit(inEnd); else src = ByteBuffer.wrap(input, inStart, inEnd - inStart); if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd); else dst = CharBuffer.wrap(output, outStart, outEnd - outStart); CoderResult cr; try { cr = decoder.decode(src, dst, false); } catch (IllegalStateException ise) { cr = decoder.reset().decode(src, dst, false); } finally { byteOff = src.position(); charOff = dst.position(); } if (cr.isUnmappable()) { badInputLength = cr.length(); throw new UnknownCharacterException(); } if (cr.isMalformed()) { badInputLength = cr.length(); throw new MalformedInputException(); } if (cr.isOverflow()) throw new ConversionBufferFullException(); // Return the length written to the output buffer if (cr.isUnderflow()) return charOff - outStart; return -1; // should be never reached }
/** * Writes any remaining output to the output buffer and resets the converter to its initial state. * * @param output char array to receive flushed output. * @param outStart start writing to output array at this offset. * @param outEnd stop writing to output array at this offset (exclusive). * @exception MalformedInputException if the output to be flushed contained a partial or invalid * multibyte character sequence. flush will write what it can to the output buffer and reset * the converter before throwing this exception. An additional call to flush is not required. * @exception ConversionBufferFullException if output array is filled before all the output can be * flushed. flush will write what it can to the output buffer and remember its state. An * additional call to flush with a new output buffer will conclude the operation. */ public int flush(char[] output, int outStart, int outEnd) throws MalformedInputException, ConversionBufferFullException { byteOff = charOff = 0; if (outStart >= outEnd || outStart >= output.length) throw new ConversionBufferFullException(); if (dst != null && dst.array() == output) dst.position(outStart).limit(outEnd); else dst = CharBuffer.wrap(output, outStart, outEnd - outStart); CoderResult cr = null; try { if (src != null) cr = decoder.decode((ByteBuffer) src.clear(), dst, true); assert !cr.isUnmappable(); if (cr.isMalformed()) { badInputLength = cr.length(); reset(); throw new MalformedInputException(); } } catch (IllegalStateException ise) { if (src != null) cr = decoder.reset().decode(src, dst, true); } try { cr = decoder.flush(dst); } catch (Exception e) { assert false; } finally { byteOff = 0; charOff = dst.position(); src = null; } if (cr.isOverflow()) throw new ConversionBufferFullException(); // Return the length written to the output buffer if (cr.isUnderflow()) { int written = charOff - outStart; reset(); return written; } assert false; return -1; // should be never reached }