/** * Write String using specified encoding * * <p>When this is called multiple times, all but the last value has a trailing null * * @param encoder * @param next * @param i * @param noOfValues * @return * @throws CharacterCodingException */ private ByteBuffer writeString(CharsetEncoder encoder, String next, int i, int noOfValues) throws CharacterCodingException { ByteBuffer bb; if ((i + 1) == noOfValues) { bb = encoder.encode(CharBuffer.wrap(next)); } else { bb = encoder.encode(CharBuffer.wrap(next + '\0')); } bb.rewind(); return bb; }
public static void main(String args[]) throws Exception { String inputFile = "samplein.txt"; String outputFile = "sampleout.txt"; RandomAccessFile inf = new RandomAccessFile(inputFile, "r"); RandomAccessFile outf = new RandomAccessFile(outputFile, "rw"); long inputLength = new File(inputFile).length(); FileChannel inc = inf.getChannel(); FileChannel outc = outf.getChannel(); MappedByteBuffer inputData = inc.map(FileChannel.MapMode.READ_ONLY, 0, inputLength); Charset latin1 = Charset.forName("ISO-8859-1"); CharsetDecoder decoder = latin1.newDecoder(); CharsetEncoder encoder = latin1.newEncoder(); CharBuffer cb = decoder.decode(inputData); // Process char data here ByteBuffer outputData = encoder.encode(cb); outc.write(outputData); inf.close(); outf.close(); }
/** * Creates a string in a specfied character set. * * @param value String constant, must not be null * @param charsetName Name of the character set, may be null * @param collation Collation, may be null * @throws IllegalCharsetNameException If the given charset name is illegal * @throws UnsupportedCharsetException If no support for the named charset is available in this * instance of the Java virtual machine * @throws RuntimeException If the given value cannot be represented in the given charset */ public NlsString(String value, String charsetName, SqlCollation collation) { assert value != null; if (null != charsetName) { charsetName = charsetName.toUpperCase(); this.charsetName = charsetName; String javaCharsetName = SqlUtil.translateCharacterSetName(charsetName); if (javaCharsetName == null) { throw new UnsupportedCharsetException(charsetName); } this.charset = Charset.forName(javaCharsetName); CharsetEncoder encoder = charset.newEncoder(); // dry run to see if encoding hits any problems try { encoder.encode(CharBuffer.wrap(value)); } catch (CharacterCodingException ex) { throw RESOURCE.charsetEncoding(value, javaCharsetName).ex(); } } else { this.charsetName = null; this.charset = null; } this.collation = collation; this.value = value; }
static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); String csn = cs.name(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } CoderResult cr = null; long t1 = System.nanoTime() / 1000; for (int i = 0; i < iteration; i++) { cbf.rewind(); bbf.clear(); enc.reset(); cr = enc.encode(cbf, bbf, true); } long t2 = System.nanoTime() / 1000; t.t = (t2 - t1) / iteration; if (cr != CoderResult.UNDERFLOW) { System.out.println("ENC-----------------"); int pos = cbf.position(); System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff); throw new RuntimeException("Encoding err: " + csn); } byte[] bb = new byte[bbf.position()]; bbf.flip(); bbf.get(bb); return bb; }
/** * Write String in UTF-BEBOM format * * <p>When this is called multiple times, all but the last value has a trailing null * * @param next * @param i * @param noOfValues * @return * @throws CharacterCodingException */ private ByteBuffer writeStringUTF16BEBOM(String next, int i, int noOfValues) throws CharacterCodingException { CharsetEncoder encoder = Charset.forName(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT).newEncoder(); encoder.onMalformedInput(CodingErrorAction.IGNORE); encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); ByteBuffer bb = null; // Add BOM if ((i + 1) == noOfValues) { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next)); } else { bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0')); } bb.rewind(); return bb; }
public static void main(String args[]) throws Exception { String s = "abc\uD800\uDC00qrst"; // Valid surrogate char[] c = s.toCharArray(); CharsetEncoder enc = Charset.forName("ISO8859_1").newEncoder().onUnmappableCharacter(CodingErrorAction.REPLACE); /* Process the first 4 characters, including the high surrogate which should be stored */ ByteBuffer bb = ByteBuffer.allocate(10); CharBuffer cb = CharBuffer.wrap(c); cb.limit(4); enc.encode(cb, bb, false); cb.limit(7); enc.encode(cb, bb, true); byte[] first = bb.array(); for (int i = 0; i < 7; i++) System.err.printf("[%d]=%d was %d\n", i, (int) first[i] & 0xffff, (int) c[i] & 0xffff); }
public static void main(String[] args) throws Exception { // 创建简体中文对应的Charset Charset cn = Charset.forName("GBK"); // 获取cn对象对应的编码器和解码器 CharsetEncoder cnEncoder = cn.newEncoder(); CharsetDecoder cnDecoder = cn.newDecoder(); // 创建一个CharBuffer对象 CharBuffer cbuff = CharBuffer.allocate(8); cbuff.put('孙'); cbuff.put('悟'); cbuff.put('空'); cbuff.flip(); // 将CharBuffer中的字符序列转换成字节序列 ByteBuffer bbuff = cnEncoder.encode(cbuff); // 循环访问ByteBuffer中的每个字节 for (int i = 0; i < bbuff.capacity(); i++) { System.out.print(bbuff.get(i) + " "); } // 将ByteBuffer的数据解码成字符序列 System.out.println("\n" + cnDecoder.decode(bbuff)); }
static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) throws Exception { ByteBuffer bbf; CharBuffer cbf; CharsetEncoder enc = cs.newEncoder(); if (testDirect) { bbf = ByteBuffer.allocateDirect(cc.length * 4); cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer(); cbf.put(cc).flip(); } else { bbf = ByteBuffer.allocate(cc.length * 4); cbf = CharBuffer.wrap(cc); } CoderResult cr = null; for (int i = 0; i < iteration; i++) { cbf.rewind(); bbf.clear(); enc.reset(); cr = enc.encode(cbf, bbf, true); } return cr; }
@Override public void visitFile(PsiFile file) { super.visitFile(file); if (InjectedLanguageManager.getInstance(file.getProject()).isInjectedFragment(file) || !file.isPhysical()) { return; } final VirtualFile virtualFile = file.getVirtualFile(); final String text = file.getText(); final Charset charset = LoadTextUtil.extractCharsetFromFileContent(file.getProject(), virtualFile, text); final CharsetEncoder encoder = charset.newEncoder().onUnmappableCharacter(CodingErrorAction.REPORT); final CharBuffer charBuffer = CharBuffer.allocate(1); final ByteBuffer byteBuffer = ByteBuffer.allocate(10); final int length = text.length(); for (int i = 0; i < length; i++) { final char c = text.charAt(i); if (c != '\\') { continue; } boolean isEscape = true; int previousChar = i - 1; while (previousChar >= 0 && text.charAt(previousChar) == '\\') { isEscape = !isEscape; previousChar--; } if (!isEscape) { continue; } int nextChar = i; do { nextChar++; if (nextChar >= length) { break; } } while (text.charAt(nextChar) == 'u'); // \uuuu0061 is a legal unicode escape if (nextChar == i + 1 || nextChar + 3 >= length) { continue; } if (StringUtil.isHexDigit(text.charAt(nextChar)) && StringUtil.isHexDigit(text.charAt(nextChar + 1)) && StringUtil.isHexDigit(text.charAt(nextChar + 2)) && StringUtil.isHexDigit(text.charAt(nextChar + 3))) { final int escapeEnd = nextChar + 4; final char d = (char) Integer.parseInt(text.substring(nextChar, escapeEnd), 16); if (Character.isISOControl(d)) { continue; } byteBuffer.clear(); charBuffer.clear(); charBuffer.put(d).rewind(); final CoderResult coderResult = encoder.encode(charBuffer, byteBuffer, true); if (!coderResult.isUnmappable()) { final PsiElement element = file.findElementAt(i); if (element != null && isSuppressedFor(element)) { return; } registerErrorAtOffset(file, i, escapeEnd - i, Character.valueOf(d)); } } } }