/** * Creates a string in a specfied character set. * * @param value String constant, must not be null * @param charsetName Name of the character set, may be null * @param collation Collation, may be null * @throws IllegalCharsetNameException If the given charset name is illegal * @throws UnsupportedCharsetException If no support for the named charset is available in this * instance of the Java virtual machine * @throws RuntimeException If the given value cannot be represented in the given charset */ public NlsString(String value, String charsetName, SqlCollation collation) { assert value != null; if (null != charsetName) { charsetName = charsetName.toUpperCase(); this.charsetName = charsetName; String javaCharsetName = SqlUtil.translateCharacterSetName(charsetName); if (javaCharsetName == null) { throw new UnsupportedCharsetException(charsetName); } this.charset = Charset.forName(javaCharsetName); CharsetEncoder encoder = charset.newEncoder(); // dry run to see if encoding hits any problems try { encoder.encode(CharBuffer.wrap(value)); } catch (CharacterCodingException ex) { throw RESOURCE.charsetEncoding(value, javaCharsetName).ex(); } } else { this.charsetName = null; this.charset = null; } this.collation = collation; this.value = value; }
static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); String csn = cs.name(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } CoderResult cr = null; long t1 = System.nanoTime() / 1000; for (int i = 0; i < iteration; i++) { cbf.rewind(); bbf.clear(); enc.reset(); cr = enc.encode(cbf, bbf, true); } long t2 = System.nanoTime() / 1000; t.t = (t2 - t1) / iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("ENC-----------------"); int pos = cbf.position(); System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff); throw new RuntimeException("Encoding err: " + csn); } byte[] bb = new byte[bbf.position()]; bbf.flip(); bbf.get(bb); return bb; }
public static void main(String[] arguments) { try { // read byte data into a byte buffer String data = "friends.dat"; FileInputStream inData = new FileInputStream(data); FileChannel inChannel = inData.getChannel(); long inSize = inChannel.size(); ByteBuffer source = ByteBuffer.allocate((int) inSize); inChannel.read(source, 0); source.position(0); System.out.println("Original byte data:"); for (int i = 0; source.remaining() > 0; i++) { System.out.print(source.get() + " "); } // convert byte data into character data source.position(0); Charset ascii = Charset.forName("US-ASCII"); CharsetDecoder toAscii = ascii.newDecoder(); CharBuffer destination = toAscii.decode(source); destination.position(0); System.out.println("\n\nNew character data:"); for (int i = 0; destination.remaining() > 0; i++) { System.out.print(destination.get()); } System.out.println(); } catch (FileNotFoundException fne) { System.out.println(fne.getMessage()); } catch (IOException ioe) { System.out.println(ioe.getMessage()); } }
public static void main(String args[]) throws Exception { String inputFile = "samplein.txt"; String outputFile = "sampleout.txt"; RandomAccessFile inf = new RandomAccessFile(inputFile, "r"); RandomAccessFile outf = new RandomAccessFile(outputFile, "rw"); long inputLength = new File(inputFile).length(); FileChannel inc = inf.getChannel(); FileChannel outc = outf.getChannel(); MappedByteBuffer inputData = inc.map(FileChannel.MapMode.READ_ONLY, 0, inputLength); Charset latin1 = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = latin1.newDecoder(); CharsetEncoder encoder = latin1.newEncoder(); CharBuffer cb = decoder.decode(inputData); // Process char data here ByteBuffer outputData = encoder.encode(cb); outc.write(outputData); inf.close(); outf.close(); }
// check and compare canEncoding/Encoding static char[] checkEncoding(Charset oldCS, Charset newCS) throws Exception { System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name()); CharsetEncoder encOLD = oldCS.newEncoder(); CharsetEncoder encNew = newCS.newEncoder(); char[] cc = new char[0x10000]; int pos = 0; boolean is970 = "x-IBM970-Old".equals(oldCS.name()); for (char c = 0; c < 0xffff; c++) { boolean canOld = encOLD.canEncode(c); boolean canNew = encNew.canEncode(c); if (is970 && c == 0x2299) continue; if (canOld != canNew) { if (canNew) { System.out.printf(" NEW(only): "); printEntry(c, newCS); } else { if (is970) { byte[] bb = new String(new char[] {c}).getBytes(oldCS); if (bb.length == 2 && bb[0] == (byte) 0xa2 && bb[1] == (byte) 0xc1) { // we know 970 has bogus nnnn -> a2c1 -> 2299 continue; } } System.out.printf(" OLD(only): "); printEntry(c, oldCS); } } else if (canNew) { byte[] bbNew = new String(new char[] {c}).getBytes(newCS); byte[] bbOld = new String(new char[] {c}).getBytes(oldCS); if (!Arrays.equals(bbNew, bbOld)) { System.out.printf(" c->b NEW: "); printEntry(c, newCS); System.out.printf(" c->b OLD: "); printEntry(c, oldCS); } else { String sNew = new String(bbNew, newCS); String sOld = new String(bbOld, oldCS); if (!sNew.equals(sOld)) { System.out.printf(" b2c NEW (c=%x):", c & 0xffff); printEntry(sNew.charAt(0), newCS); System.out.printf(" b2c OLD:"); printEntry(sOld.charAt(0), oldCS); } } } if (canNew & canOld) { // added only both for now cc[pos++] = c; } } return Arrays.copyOf(cc, pos); }
static void checkUnderOverflow(Charset cs) throws Exception { String csn = cs.name(); System.out.printf("Check under/overflow <%s>...%n", csn); CharsetDecoder dec = cs.newDecoder(); boolean failed = false; // 7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1 // 0 1 2 3 7 11 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW"); int inlen = bytes.length; int MAXOFF = 20; for (int inoff = 0; inoff < MAXOFF; inoff++) { for (int outoff = 0; outoff < MAXOFF; outoff++) { int[][] Flows = { // inpos, inLen, outPos, outLen, inPosEP, outposEP, under(0)/over(1) // overflow {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1}, {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1}, {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1}, {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1}, {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1}, {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0}, // underflow {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0}, {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0}, {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0}, {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0}, {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0}, {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0}, {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0}, {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0}, {inoff, 11, outoff, 6, inoff + 11, outoff + 4, 0}, {inoff, 12, outoff, 6, inoff + 11, outoff + 4, 0}, {inoff, 15, outoff, 6, inoff + 15, outoff + 6, 0}, // 2-byte under/overflow {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0}, {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1}, {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0}, }; for (boolean direct : new boolean[] {false, true}) { for (int[] flow : Flows) { if (!check(dec, bytes, direct, flow)) failed = true; } } } } if (failed) throw new RuntimeException("Check under/overflow failed " + csn); }
static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception { System.gc(); // enqueue finalizable objects Thread.sleep(1000); System.gc(); // enqueue finalizable objects String csn1 = cs1.name(); String csn2 = cs2.name(); System.out.printf("Diff <%s> <%s>...%n", csn1, csn2); Time t1 = new Time(); Time t2 = new Time(); byte[] bb1 = encode(cc, cs1, false, t1); byte[] bb2 = encode(cc, cs2, false, t2); System.out.printf( " Encoding TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(bb1, bb2)) { System.out.printf(" encoding failed%n"); } char[] cc2 = decode(bb1, cs2, false, t2); char[] cc1 = decode(bb1, cs1, false, t1); System.out.printf( " Decoding TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(cc1, cc2)) { System.out.printf(" decoding failed%n"); } bb1 = encode(cc, cs1, true, t1); bb2 = encode(cc, cs2, true, t2); System.out.printf( " Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(bb1, bb2)) System.out.printf(" encoding (direct) failed%n"); cc1 = decode(bb1, cs1, true, t1); cc2 = decode(bb1, cs2, true, t2); System.out.printf( " Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n", csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t)); if (!Arrays.equals(cc1, cc2)) { System.out.printf(" decoding (direct) failed%n"); } }
// Check if DatagramChannel.send while connected can include // address without throwing private static void test1() throws Exception { DatagramChannel sndChannel = DatagramChannel.open(); sndChannel.socket().bind(null); InetSocketAddress sender = new InetSocketAddress(InetAddress.getLocalHost(), sndChannel.socket().getLocalPort()); DatagramChannel rcvChannel = DatagramChannel.open(); rcvChannel.socket().bind(null); InetSocketAddress receiver = new InetSocketAddress(InetAddress.getLocalHost(), rcvChannel.socket().getLocalPort()); rcvChannel.connect(sender); sndChannel.connect(receiver); ByteBuffer bb = ByteBuffer.allocate(256); bb.put("hello".getBytes()); bb.flip(); int sent = sndChannel.send(bb, receiver); bb.clear(); rcvChannel.receive(bb); bb.flip(); CharBuffer cb = Charset.forName("US-ASCII").newDecoder().decode(bb); if (!cb.toString().startsWith("h")) throw new RuntimeException("Test failed"); rcvChannel.close(); sndChannel.close(); }
// Check if the datagramsocket adaptor can send with a packet // that has not been initialized with an address; the legacy // datagram socket will send in this case private static void test2() throws Exception { DatagramChannel sndChannel = DatagramChannel.open(); sndChannel.socket().bind(null); InetSocketAddress sender = new InetSocketAddress(InetAddress.getLocalHost(), sndChannel.socket().getLocalPort()); DatagramChannel rcvChannel = DatagramChannel.open(); rcvChannel.socket().bind(null); InetSocketAddress receiver = new InetSocketAddress(InetAddress.getLocalHost(), rcvChannel.socket().getLocalPort()); rcvChannel.connect(sender); sndChannel.connect(receiver); byte b[] = "hello".getBytes("UTF-8"); DatagramPacket pkt = new DatagramPacket(b, b.length); sndChannel.socket().send(pkt); ByteBuffer bb = ByteBuffer.allocate(256); rcvChannel.receive(bb); bb.flip(); CharBuffer cb = Charset.forName("US-ASCII").newDecoder().decode(bb); if (!cb.toString().startsWith("h")) throw new RuntimeException("Test failed"); // Check that the pkt got set with the target address; // This is legacy behavior if (!pkt.getSocketAddress().equals(receiver)) throw new RuntimeException("Test failed"); rcvChannel.close(); sndChannel.close(); }
/** * Constructor. * * @param enc encoding * @throws IOException I/O exception */ private Generic(final String enc) throws IOException { try { csd = Charset.forName(enc).newDecoder(); } catch (final Exception ex) { throw new EncodingException(ex); } }
public static void main(String[] args) throws Exception { for (String csname : csnames) { System.out.printf("-----------------------------------%n"); String oldname = csname + "_OLD"; checkInit(csname); Charset csOld = (Charset) Class.forName(oldname).newInstance(); Charset csNew = Charset.forName(csname); char[] cc = checkEncoding(csOld, csNew); checkDecoding(csOld, csNew); compare(csNew, csOld, cc); if (csname.startsWith("x-IBM93")) { // ecdbic checkMalformed( csNew, new byte[][] { {1, 0x26, 0x0f, 0x27}, // in SBSC, no SI {1, 0x0e, 0x41, 0x41, 0xe}, // in DBSC, no SO {2, 0x0e, 0x40, 0x41, 0xe}, // illegal DB }); } else if (csname.equals("x-IBM970") || csname.equals("x-IBM1383")) { // euc_simple checkMalformed( csNew, new byte[][] { {1, 0x26, (byte) 0x8f, 0x27}, // SS2 {1, (byte) 0xa1, (byte) 0xa1, (byte) 0x8e, 0x51}, // SS3 }); } } }
static void checkMalformed(Charset cs, byte[][] malformed) throws Exception { boolean failed = false; String csn = cs.name(); System.out.printf("Check malformed <%s>...%n", csn); for (boolean direct : new boolean[] {false, true}) { for (byte[] bins : malformed) { int mlen = bins[0]; byte[] bin = Arrays.copyOfRange(bins, 1, bins.length); CoderResult cr = decodeCR(bin, cs, direct); String ashex = ""; for (int i = 0; i < bin.length; i++) { if (i > 0) ashex += " "; ashex += Integer.toString((int) bin[i] & 0xff, 16); } if (!cr.isMalformed()) { System.out.printf( " FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString()); failed = true; } else if (cr.length() != mlen) { System.out.printf( " FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length()); failed = true; } } } if (failed) throw new RuntimeException("Check malformed failed " + csn); }
EncodingConverter(String encoding) { Name = encoding; myDecoder = Charset.forName(encoding) .newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); }
static void checkInit(String csn) throws Exception { System.out.printf("Check init <%s>...%n", csn); Charset.forName("Big5"); // load in the ExtendedCharsets long t1 = System.nanoTime() / 1000; Charset cs = Charset.forName(csn); long t2 = System.nanoTime() / 1000; System.out.printf(" charset :%d%n", t2 - t1); t1 = System.nanoTime() / 1000; cs.newDecoder(); t2 = System.nanoTime() / 1000; System.out.printf(" new Decoder :%d%n", t2 - t1); t1 = System.nanoTime() / 1000; cs.newEncoder(); t2 = System.nanoTime() / 1000; System.out.printf(" new Encoder :%d%n", t2 - t1); }
private void recv(SocketChannel sc, ClientInfo ci) throws IOException { ci.channel.read(ci.inBuf); ByteBuffer tmpBuf = ci.inBuf.duplicate(); tmpBuf.flip(); int bytesProcessed = 0; boolean doneLoop = false; while (!doneLoop) { byte b; try { b = tmpBuf.get(); } catch (BufferUnderflowException bue) { // Processed all data in buffer ci.inBuf.clear(); doneLoop = true; break; } switch (b) { case TypeServerConstants.WELCOME: bytesProcessed++; break; case TypeServerConstants.GET_STRING_REQUEST: bytesProcessed++; if (ci.outputPending) { // Client is backed up. We can't append to // the byte buffer because it's in the wrong // state. We could allocate another buffer // here and change our send method to know // about multiple buffers, but we'll just // assume that the client is dead break; } ci.outBuf.put(TypeServerConstants.GET_STRING_RESPONSE); ByteBuffer strBuf = encoder.encode(testString); ci.outBuf.putShort((short) strBuf.remaining()); ci.outBuf.put(strBuf); ci.outBuf.flip(); send(sc, ci); break; case TypeServerConstants.GET_STRING_RESPONSE: int startPos = tmpBuf.position(); try { int nBytes = tmpBuf.getInt(); byte[] buf = new byte[nBytes]; tmpBuf.get(buf); bytesProcessed += buf.length + 5; String s = new String(buf); // Send the string to the GUI break; } catch (BufferUnderflowException bue) { // Processed all available data ci.inBuf.position(ci.inBuf.position() + bytesProcessed); doneLoop = true; } break; } } }
static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) throws Exception { String csn = cs.name(); CharsetDecoder dec = cs.newDecoder(); ByteBuffer bbf; CharBuffer cbf; if (testDirect) { bbf = ByteBuffer.allocateDirect(bb.length); cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer(); bbf.put(bb); } else { bbf = ByteBuffer.wrap(bb); cbf = CharBuffer.allocate(bb.length); } CoderResult cr = null; long t1 = System.nanoTime() / 1000; for (int i = 0; i < iteration; i++) { bbf.rewind(); cbf.clear(); dec.reset(); cr = dec.decode(bbf, cbf, true); } long t2 = System.nanoTime() / 1000; t.t = (t2 - t1) / iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("DEC-----------------"); int pos = bbf.position(); System.out.printf( " cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n", cr.toString(), pos, bb[pos++] & 0xff, bb[pos++] & 0xff, bb[pos++] & 0xff, bb[pos++] & 0xff); throw new RuntimeException("Decoding err: " + csn); } char[] cc = new char[cbf.position()]; cbf.flip(); cbf.get(cc); return cc; }
public static void main(String[] args) throws Exception { // 创建简体中文对应的Charset Charset cn = Charset.forName("GBK"); // 获取cn对象对应的编码器和解码器 CharsetEncoder cnEncoder = cn.newEncoder(); CharsetDecoder cnDecoder = cn.newDecoder(); // 创建一个CharBuffer对象 CharBuffer cbuff = CharBuffer.allocate(8); cbuff.put('孙'); cbuff.put('悟'); cbuff.put('空'); cbuff.flip(); // 将CharBuffer中的字符序列转换成字节序列 ByteBuffer bbuff = cnEncoder.encode(cbuff); // 循环访问ByteBuffer中的每个字节 for (int i = 0; i < bbuff.capacity(); i++) { System.out.print(bbuff.get(i) + " "); } // 将ByteBuffer的数据解码成字符序列 System.out.println("\n" + cnDecoder.decode(bbuff)); }
/** * Get text content of a source file. * * @param path The canonical path of source file. * @param charset Source file encoding. * @return Source file content. */ public String read(String path, String charset) { String str = null; byte[] bin = read(path); try { str = Charset.forName(charset).newDecoder().decode(ByteBuffer.wrap(bin)).toString(); } catch (CharacterCodingException e) { App.exit("Cannot read " + path + " as " + charset + " encoded file"); } return str; }
public static void main(String[] args) throws Throwable { for (Boolean hasSM : new boolean[] {false, true}) { if (hasSM) System.setSecurityManager(new PermissiveSecurityManger()); for (Charset cs : Charset.availableCharsets().values()) { if ("ISO-2022-CN".equals(cs.name()) || "x-COMPOUND_TEXT".equals(cs.name()) || "x-JISAutoDetect".equals(cs.name())) continue; System.out.printf("Testing(sm=%b) " + cs.name() + "....", hasSM); // full bmp first char[] bmpCA = new char[0x10000]; for (int i = 0; i < 0x10000; i++) { bmpCA[i] = (char) i; } byte[] sbBA = new byte[0x100]; for (int i = 0; i < 0x100; i++) { sbBA[i] = (byte) i; } test(cs, bmpCA, sbBA); // "randomed" sizes Random rnd = new Random(); for (int i = 0; i < 10; i++) { int clen = rnd.nextInt(0x10000); int blen = rnd.nextInt(0x100); // System.out.printf(" blen=%d, clen=%d%n", blen, clen); test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen)); // add a pair of surrogates int pos = clen / 2; if ((pos + 1) < blen) { bmpCA[pos] = '\uD800'; bmpCA[pos + 1] = '\uDC00'; } test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen)); } testMixed(cs); System.out.println("done!"); } } }
/** * Decode file charset. * * @param f File to process. * @return File charset. * @throws IOException in case of error. */ public static Charset decode(File f) throws IOException { SortedMap<String, Charset> charsets = Charset.availableCharsets(); String[] firstCharsets = { Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE" }; Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size()); for (String c : firstCharsets) if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c)); orderedCharsets.addAll(charsets.values()); try (RandomAccessFile raf = new RandomAccessFile(f, "r")) { FileChannel ch = raf.getChannel(); ByteBuffer buf = ByteBuffer.allocate(4096); ch.read(buf); buf.flip(); for (Charset charset : orderedCharsets) { CharsetDecoder decoder = charset.newDecoder(); decoder.reset(); try { decoder.decode(buf); return charset; } catch (CharacterCodingException ignored) { } } } return Charset.defaultCharset(); }
/** * Check if the first X characters of a byte stream match a String. * * @param data The byte array to process * @param pattern The String to match * @return True if the pattern was found, false otherwise */ private static boolean bytesEqualsString(byte[] data, String pattern) { byte[] bytes = new byte[pattern.length()]; Charset csets = Charset.forName("US-ASCII"); boolean fin = false; int currChar = 0; // remove any CR and/or LF characters at the beginning of the article // data while (!fin) { if (currChar >= data.length) break; byte in = data[currChar]; ByteBuffer bb = ByteBuffer.wrap(new byte[] {(byte) in}); CharBuffer cb = csets.decode(bb); char c = cb.charAt(0); if (data.length > 0 && (c == '\n' || c == '\r')) currChar++; else fin = true; if (data.length == 0) fin = true; } // extract bytes (chars) to check from article data for (int i = 0; i < bytes.length && i < data.length; i++, currChar++) { byte in = data[currChar]; bytes[i] = (byte) in; } // decode byte data to characters ByteBuffer bb = ByteBuffer.wrap(bytes); CharBuffer cb = csets.decode(bb); // compare these characters to the pattern String for (int i = 0; i < pattern.length(); i++) if (cb.charAt(i) != pattern.charAt(i)) return false; return true; }
public static void main(String args[]) throws Exception { String s = "abc\uD800\uDC00qrst"; // Valid surrogate char[] c = s.toCharArray(); CharsetEncoder enc = Charset.forName("ISO8859_1").newEncoder().onUnmappableCharacter(CodingErrorAction.REPLACE); /* Process the first 4 characters, including the high surrogate which should be stored */ ByteBuffer bb = ByteBuffer.allocate(10); CharBuffer cb = CharBuffer.wrap(c); cb.limit(4); enc.encode(cb, bb, false); cb.limit(7); enc.encode(cb, bb, true); byte[] first = bb.array(); for (int i = 0; i < 7; i++) System.err.printf("[%d]=%d was %d\n", i, (int) first[i] & 0xffff, (int) c[i] & 0xffff); }
static void testMixed(Charset cs) throws Throwable { CharsetDecoder dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); CharsetEncoder enc = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); List<Integer> cps = new ArrayList<>(0x10000); int off = 0; int cp = 0; while (cp < 0x10000) { if (enc.canEncode((char) cp)) { cps.add(cp); } cp++; } Collections.shuffle(cps); char[] bmpCA = new char[cps.size()]; for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i); String bmpStr = new String(bmpCA); // getBytes(csn); byte[] bmpBA = bmpStr.getBytes(cs.name()); ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA)); byte[] baNIO = new byte[bf.limit()]; bf.get(baNIO, 0, baNIO.length); if (!Arrays.equals(bmpBA, baNIO)) { throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); } // getBytes(cs); bmpBA = bmpStr.getBytes(cs); if (!Arrays.equals(bmpBA, baNIO)) throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); // new String(csn); String strSC = new String(bmpBA, cs.name()); String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString(); if (!strNIO.equals(strSC)) { throw new RuntimeException("new String(csn) failed -> " + cs.name()); } // new String(cs); strSC = new String(bmpBA, cs); if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed -> " + cs.name()); }
static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } CoderResult cr = null; for (int i = 0; i < iteration; i++) { cbf.rewind(); bbf.clear(); enc.reset(); cr = enc.encode(cbf, bbf, true); } return cr; }
static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) throws Exception { CharsetDecoder dec = cs.newDecoder(); ByteBuffer bbf; CharBuffer cbf; if (testDirect) { bbf = ByteBuffer.allocateDirect(bb.length); cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer(); bbf.put(bb).flip(); } else { bbf = ByteBuffer.wrap(bb); cbf = CharBuffer.allocate(bb.length); } CoderResult cr = null; for (int i = 0; i < iteration; i++) { bbf.rewind(); cbf.clear(); dec.reset(); cr = dec.decode(bbf, cbf, true); } return cr; }
// 感觉如果没有看过 NIO 的详细内容是不会懂这部分 public class TypeServer extends TCPNIOServer { static String testString = "Thisisateststring"; static class ClientInfo { ByteBuffer inBuf = ByteBuffer.allocateDirect(512); ByteBuffer outBuf = ByteBuffer.allocateDirect(512); boolean outputPending = false; SocketChannel channel; } Map allClients = new HashMap(); Charset encoder = Charset.forName("UTF-8"); protected void handleClient(SelectionKey key) throws IOException { SocketChannel sc = (SocketChannel) key.channel(); ClientInfo ci = (ClientInfo) allClients.get(sc); if (ci == null) throw new IllegalStateException("Unknown client"); if (key.isWritable()) send(sc, ci); if (key.isReadable()) // 从一个客户端读进所有的可用数据 recv(sc, ci); } private void recv(SocketChannel sc, ClientInfo ci) throws IOException { ci.channel.read(ci.inBuf); ByteBuffer tmpBuf = ci.inBuf.duplicate(); tmpBuf.flip(); int bytesProcessed = 0; boolean doneLoop = false; while (!doneLoop) { byte b; try { b = tmpBuf.get(); } catch (BufferUnderflowException bue) { // Processed all data in buffer ci.inBuf.clear(); doneLoop = true; break; } switch (b) { case TypeServerConstants.WELCOME: bytesProcessed++; break; case TypeServerConstants.GET_STRING_REQUEST: bytesProcessed++; if (ci.outputPending) { // Client is backed up. We can't append to // the byte buffer because it's in the wrong // state. We could allocate another buffer // here and change our send method to know // about multiple buffers, but we'll just // assume that the client is dead break; } ci.outBuf.put(TypeServerConstants.GET_STRING_RESPONSE); ByteBuffer strBuf = encoder.encode(testString); ci.outBuf.putShort((short) strBuf.remaining()); ci.outBuf.put(strBuf); ci.outBuf.flip(); send(sc, ci); break; case TypeServerConstants.GET_STRING_RESPONSE: int startPos = tmpBuf.position(); try { int nBytes = tmpBuf.getInt(); byte[] buf = new byte[nBytes]; tmpBuf.get(buf); bytesProcessed += buf.length + 5; String s = new String(buf); // Send the string to the GUI break; } catch (BufferUnderflowException bue) { // Processed all available data ci.inBuf.position(ci.inBuf.position() + bytesProcessed); doneLoop = true; } break; } } } private void send(SocketChannel sc, ClientInfo ci) throws IOException { int len = ci.outBuf.remaining(); int nBytes = sc.write(ci.outBuf); if (nBytes != len) { // Client not ready to receive data ci.outputPending = true; ci.channel.register(selector, SelectionKey.OP_READ | SelectionKey.OP_WRITE); } else { ci.outBuf.clear(); if (ci.outputPending) { ci.outputPending = false; ci.channel.register(selector, SelectionKey.OP_READ); } } } protected void registeredClient(SocketChannel sc) throws IOException { ClientInfo ci = new ClientInfo(); ci.channel = sc; ci.outBuf.clear(); ci.outBuf.put(TypeServerConstants.WELCOME); ci.outBuf.flip(); allClients.put(sc, ci); send(sc, ci); } public static void main(String[] args) throws Exception { TypeServer ts = new TypeServer(); ts.port = Integer.parseInt(args[0]); Thread t = new Thread(ts); t.start(); System.out.println("Type server ready...Type CTRL-D to exit"); while (System.in.read() > 0) ; ts.stopServer(); t.join(); } }
public static void main(String[] args) throws Throwable { final int itrs = Integer.getInteger("iterations", 100000); // final int itrs = Integer.getInteger("iterations", 12); final int size = Integer.getInteger("size", 2048); final int subsize = Integer.getInteger("subsize", 128); final int maxchar = Integer.getInteger("maxchar", 128); final String regex = System.getProperty("filter"); final Pattern filter = (regex == null) ? null : Pattern.compile(regex); final boolean useSecurityManager = Boolean.getBoolean("SecurityManager"); if (useSecurityManager) System.setSecurityManager(new PermissiveSecurityManger()); final Random rnd = new Random(); String[] csns = new String[] { "Big5", "Johab", "EUC_CN", "EUC_KR", "MS932", "MS936", "MS949", "MS950", "GBK", "Big5_HKSCS", "Big5_HKSCS_2001", "Big5_Solaris", "MS950_HKSCS", "MS950_HKSCS_XP", "IBM1364", "IBM1381", "IBM1383", "IBM930", "IBM933", "IBM935", "IBM937", "IBM939", "IBM942", "IBM943", "IBM948", "IBM949", "IBM950", "IBM970", }; ArrayList<long[]> sum = new ArrayList<>(); for (final String csn : csns) { final Charset cs = Charset.forName(csn); List<Integer> cps = new ArrayList<>(0x4000); int off = 0; int cp = 0; int n = 0; CharsetEncoder enc = cs.newEncoder(); while (cp < 0x10000 && n < cps.size()) { if (enc.canEncode((char) cp)) { cps.add(cp); n++; } cp++; } Collections.shuffle(cps); char[] ca = new char[cps.size()]; for (int i = 0; i < cps.size(); i++) ca[i] = (char) (int) cps.get(i); System.out.printf("%n--------%s---------%n", csn); for (int sz = 8; sz <= 2048; sz *= 2) { System.out.printf(" [len=%d]%n", sz); final char[] chars = Arrays.copyOf(ca, sz); final String str = new String(chars); final byte[] bs = str.getBytes(cs); Job[] jobs = { new Job("String decode: csn") { public void work() throws Throwable { for (int i = 0; i < itrs; i++) new String(bs, csn); } }, new Job("String decode: cs") { public void work() throws Throwable { for (int i = 0; i < itrs; i++) new String(bs, cs); } }, new Job("String encode: csn") { public void work() throws Throwable { for (int i = 0; i < itrs; i++) str.getBytes(csn); } }, new Job("String encode: cs") { public void work() throws Throwable { for (int i = 0; i < itrs; i++) str.getBytes(cs); } }, }; sum.add(time(jobs)); } } }
ByteToCharConverter(Charset charset, String encoding) { super(encoding); decoder = charset.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE); // for compatibility to old ByteToCharASCII converter: if (charset.name().equals("US-ASCII")) decoder.onMalformedInput(CodingErrorAction.REPLACE); }
// check and compare canEncoding/Encoding static void checkDecoding(Charset oldCS, Charset newCS) throws Exception { System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name()); boolean isEBCDIC = oldCS.name().startsWith("x-IBM93"); // Try singlebyte first byte[] bb = new byte[1]; System.out.printf(" trying SB...%n"); for (int b = 0; b < 0x100; b++) { bb[0] = (byte) b; String sOld = new String(bb, oldCS); String sNew = new String(bb, newCS); if (!sOld.equals(sNew)) { System.out.printf( " b=%x: %x/%d(old) %x/%d(new)%n", b & 0xff, sOld.charAt(0) & 0xffff, sOld.length(), sNew.charAt(0) & 0xffff, sNew.length()); } } System.out.printf(" trying DB...%n"); bb = new byte[isEBCDIC ? 4 : 2]; int b1Min = 0x40; int b1Max = 0xfe; for (int b1 = 0x40; b1 < 0xff; b1++) { if (!isEBCDIC) { // decodable singlebyte b1 bb[0] = (byte) b1; String sOld = new String(bb, oldCS); String sNew = new String(bb, newCS); if (!sOld.equals(sNew)) { if (sOld.length() != 2 && sOld.charAt(0) != 0) { // only prints we are NOT expected. above two are known issue System.out.printf( " b1=%x: %x/%d(old) %x/%d(new)%n", b1 & 0xff, sOld.charAt(0) & 0xffff, sOld.length(), sNew.charAt(0) & 0xffff, sNew.length()); continue; } } } for (int b2 = 0x40; b2 < 0xff; b2++) { if (isEBCDIC) { bb[0] = 0x0e; bb[1] = (byte) b1; bb[2] = (byte) b2; bb[3] = 0x0f; } else { bb[0] = (byte) b1; bb[1] = (byte) b2; } String sOld = new String(bb, oldCS); String sNew = new String(bb, newCS); // if (!sOld.equals(sNew)) { if (sOld.charAt(0) != sNew.charAt(0)) { if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd) continue; // known issude in old implementation System.out.printf( " bb=<%x,%x> c(old)=%x, c(new)=%x%n", b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff); } } } }
static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable { String bmpStr = new String(bmpCA); CharsetDecoder dec = cs.newDecoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); CharsetEncoder enc = cs.newEncoder() .onMalformedInput(CodingErrorAction.REPLACE) .onUnmappableCharacter(CodingErrorAction.REPLACE); // getBytes(csn); byte[] baSC = bmpStr.getBytes(cs.name()); ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA)); byte[] baNIO = new byte[bf.limit()]; bf.get(baNIO, 0, baNIO.length); if (!Arrays.equals(baSC, baNIO)) throw new RuntimeException("getBytes(csn) failed -> " + cs.name()); // getBytes(cs); baSC = bmpStr.getBytes(cs); if (!Arrays.equals(baSC, baNIO)) throw new RuntimeException("getBytes(cs) failed -> " + cs.name()); // new String(csn); String strSC = new String(sbBA, cs.name()); String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString(); if (!strNIO.equals(strSC)) throw new RuntimeException("new String(csn) failed -> " + cs.name()); // new String(cs); strSC = new String(sbBA, cs); if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed -> " + cs.name()); // encode unmappable surrogates if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) { if (cs.name().equals("UTF-8") || // utf8 handles surrogates cs.name().equals("CESU-8")) // utf8 handles surrogates return; enc.replaceWith(new byte[] {(byte) 'A'}); sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc; String str = "ab\uD800\uDC00\uD800\uDC00cd"; byte[] ba = new byte[str.length() - 2]; int n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 6 || !"abAAcd".equals(new String(ba, cs.name()))) throw new RuntimeException("encode1(surrogates) failed -> " + cs.name()); ba = new byte[str.length()]; n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name()))) throw new RuntimeException("encode2(surrogates) failed -> " + cs.name()); str = "ab\uD800B\uDC00Bcd"; ba = new byte[str.length()]; n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name()))) throw new RuntimeException("encode3(surrogates) failed -> " + cs.name()); /* sun.nio.cs.ArrayDeEncoder works on the assumption that the invoker (StringCoder) allocates enough output buf, utf8 and double-byte coder does not check the output buffer limit. ba = new byte[str.length() - 1]; n = cae.encode(str.toCharArray(), 0, str.length(), ba); if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) { throw new RuntimeException("encode4(surrogates) failed -> " + cs.name()); } */ } }