/**
   * Write String using specified encoding
   *
   * <p>When this is called multiple times, all but the last value has a trailing null
   *
   * @param encoder
   * @param next
   * @param i
   * @param noOfValues
   * @return
   * @throws CharacterCodingException
   */
  private ByteBuffer writeString(CharsetEncoder encoder, String next, int i, int noOfValues)
      throws CharacterCodingException {

    ByteBuffer bb;
    if ((i + 1) == noOfValues) {
      bb = encoder.encode(CharBuffer.wrap(next));
    } else {
      bb = encoder.encode(CharBuffer.wrap(next + '\0'));
    }
    bb.rewind();
    return bb;
  }
Esempio n. 2
0
  public static void main(String args[]) throws Exception {
    String inputFile = "samplein.txt";
    String outputFile = "sampleout.txt";

    RandomAccessFile inf = new RandomAccessFile(inputFile, "r");
    RandomAccessFile outf = new RandomAccessFile(outputFile, "rw");
    long inputLength = new File(inputFile).length();

    FileChannel inc = inf.getChannel();
    FileChannel outc = outf.getChannel();

    MappedByteBuffer inputData = inc.map(FileChannel.MapMode.READ_ONLY, 0, inputLength);

    Charset latin1 = Charset.forName("ISO-8859-1");
    CharsetDecoder decoder = latin1.newDecoder();
    CharsetEncoder encoder = latin1.newEncoder();

    CharBuffer cb = decoder.decode(inputData);

    // Process char data here

    ByteBuffer outputData = encoder.encode(cb);

    outc.write(outputData);

    inf.close();
    outf.close();
  }
Esempio n. 3
0
  /**
   * Creates a string in a specfied character set.
   *
   * @param value String constant, must not be null
   * @param charsetName Name of the character set, may be null
   * @param collation Collation, may be null
   * @throws IllegalCharsetNameException If the given charset name is illegal
   * @throws UnsupportedCharsetException If no support for the named charset is available in this
   *     instance of the Java virtual machine
   * @throws RuntimeException If the given value cannot be represented in the given charset
   */
  public NlsString(String value, String charsetName, SqlCollation collation) {
    assert value != null;
    if (null != charsetName) {
      charsetName = charsetName.toUpperCase();
      this.charsetName = charsetName;
      String javaCharsetName = SqlUtil.translateCharacterSetName(charsetName);
      if (javaCharsetName == null) {
        throw new UnsupportedCharsetException(charsetName);
      }
      this.charset = Charset.forName(javaCharsetName);
      CharsetEncoder encoder = charset.newEncoder();

      // dry run to see if encoding hits any problems
      try {
        encoder.encode(CharBuffer.wrap(value));
      } catch (CharacterCodingException ex) {
        throw RESOURCE.charsetEncoding(value, javaCharsetName).ex();
      }
    } else {
      this.charsetName = null;
      this.charset = null;
    }
    this.collation = collation;
    this.value = value;
  }
Esempio n. 4
0
 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   String csn = cs.name();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   long t1 = System.nanoTime() / 1000;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   long t2 = System.nanoTime() / 1000;
   t.t = (t2 - t1) / iteration;
   if (cr != CoderResult.UNDERFLOW) {
     System.out.println("ENC-----------------");
     int pos = cbf.position();
     System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff);
     throw new RuntimeException("Encoding err: " + csn);
   }
   byte[] bb = new byte[bbf.position()];
   bbf.flip();
   bbf.get(bb);
   return bb;
 }
  /**
   * Write String in UTF-BEBOM format
   *
   * <p>When this is called multiple times, all but the last value has a trailing null
   *
   * @param next
   * @param i
   * @param noOfValues
   * @return
   * @throws CharacterCodingException
   */
  private ByteBuffer writeStringUTF16BEBOM(String next, int i, int noOfValues)
      throws CharacterCodingException {
    CharsetEncoder encoder =
        Charset.forName(TextEncoding.CHARSET_UTF_16_BE_ENCODING_FORMAT).newEncoder();
    encoder.onMalformedInput(CodingErrorAction.IGNORE);
    encoder.onUnmappableCharacter(CodingErrorAction.IGNORE);

    ByteBuffer bb = null;
    // Add BOM
    if ((i + 1) == noOfValues) {
      bb = encoder.encode(CharBuffer.wrap('\ufeff' + next));
    } else {
      bb = encoder.encode(CharBuffer.wrap('\ufeff' + next + '\0'));
    }
    bb.rewind();
    return bb;
  }
Esempio n. 6
0
 public static void main(String args[]) throws Exception {
   String s = "abc\uD800\uDC00qrst"; // Valid surrogate
   char[] c = s.toCharArray();
   CharsetEncoder enc =
       Charset.forName("ISO8859_1").newEncoder().onUnmappableCharacter(CodingErrorAction.REPLACE);
   /* Process the first 4 characters, including the high surrogate
   which should be stored */
   ByteBuffer bb = ByteBuffer.allocate(10);
   CharBuffer cb = CharBuffer.wrap(c);
   cb.limit(4);
   enc.encode(cb, bb, false);
   cb.limit(7);
   enc.encode(cb, bb, true);
   byte[] first = bb.array();
   for (int i = 0; i < 7; i++)
     System.err.printf("[%d]=%d was %d\n", i, (int) first[i] & 0xffff, (int) c[i] & 0xffff);
 }
 public static void main(String[] args) throws Exception {
   // 创建简体中文对应的Charset
   Charset cn = Charset.forName("GBK");
   // 获取cn对象对应的编码器和解码器
   CharsetEncoder cnEncoder = cn.newEncoder();
   CharsetDecoder cnDecoder = cn.newDecoder();
   // 创建一个CharBuffer对象
   CharBuffer cbuff = CharBuffer.allocate(8);
   cbuff.put('孙');
   cbuff.put('悟');
   cbuff.put('空');
   cbuff.flip();
   // 将CharBuffer中的字符序列转换成字节序列
   ByteBuffer bbuff = cnEncoder.encode(cbuff);
   // 循环访问ByteBuffer中的每个字节
   for (int i = 0; i < bbuff.capacity(); i++) {
     System.out.print(bbuff.get(i) + " ");
   }
   // 将ByteBuffer的数据解码成字符序列
   System.out.println("\n" + cnDecoder.decode(bbuff));
 }
Esempio n. 8
0
 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   return cr;
 }
 @Override
 public void visitFile(PsiFile file) {
   super.visitFile(file);
   if (InjectedLanguageManager.getInstance(file.getProject()).isInjectedFragment(file)
       || !file.isPhysical()) {
     return;
   }
   final VirtualFile virtualFile = file.getVirtualFile();
   final String text = file.getText();
   final Charset charset =
       LoadTextUtil.extractCharsetFromFileContent(file.getProject(), virtualFile, text);
   final CharsetEncoder encoder =
       charset.newEncoder().onUnmappableCharacter(CodingErrorAction.REPORT);
   final CharBuffer charBuffer = CharBuffer.allocate(1);
   final ByteBuffer byteBuffer = ByteBuffer.allocate(10);
   final int length = text.length();
   for (int i = 0; i < length; i++) {
     final char c = text.charAt(i);
     if (c != '\\') {
       continue;
     }
     boolean isEscape = true;
     int previousChar = i - 1;
     while (previousChar >= 0 && text.charAt(previousChar) == '\\') {
       isEscape = !isEscape;
       previousChar--;
     }
     if (!isEscape) {
       continue;
     }
     int nextChar = i;
     do {
       nextChar++;
       if (nextChar >= length) {
         break;
       }
     } while (text.charAt(nextChar) == 'u'); // \uuuu0061 is a legal unicode escape
     if (nextChar == i + 1 || nextChar + 3 >= length) {
       continue;
     }
     if (StringUtil.isHexDigit(text.charAt(nextChar))
         && StringUtil.isHexDigit(text.charAt(nextChar + 1))
         && StringUtil.isHexDigit(text.charAt(nextChar + 2))
         && StringUtil.isHexDigit(text.charAt(nextChar + 3))) {
       final int escapeEnd = nextChar + 4;
       final char d = (char) Integer.parseInt(text.substring(nextChar, escapeEnd), 16);
       if (Character.isISOControl(d)) {
         continue;
       }
       byteBuffer.clear();
       charBuffer.clear();
       charBuffer.put(d).rewind();
       final CoderResult coderResult = encoder.encode(charBuffer, byteBuffer, true);
       if (!coderResult.isUnmappable()) {
         final PsiElement element = file.findElementAt(i);
         if (element != null && isSuppressedFor(element)) {
           return;
         }
         registerErrorAtOffset(file, i, escapeEnd - i, Character.valueOf(d));
       }
     }
   }
 }