Ejemplo n.º 1
0
  /**
   * Creates a string in a specfied character set.
   *
   * @param value String constant, must not be null
   * @param charsetName Name of the character set, may be null
   * @param collation Collation, may be null
   * @throws IllegalCharsetNameException If the given charset name is illegal
   * @throws UnsupportedCharsetException If no support for the named charset is available in this
   *     instance of the Java virtual machine
   * @throws RuntimeException If the given value cannot be represented in the given charset
   */
  public NlsString(String value, String charsetName, SqlCollation collation) {
    assert value != null;
    if (null != charsetName) {
      charsetName = charsetName.toUpperCase();
      this.charsetName = charsetName;
      String javaCharsetName = SqlUtil.translateCharacterSetName(charsetName);
      if (javaCharsetName == null) {
        throw new UnsupportedCharsetException(charsetName);
      }
      this.charset = Charset.forName(javaCharsetName);
      CharsetEncoder encoder = charset.newEncoder();

      // dry run to see if encoding hits any problems
      try {
        encoder.encode(CharBuffer.wrap(value));
      } catch (CharacterCodingException ex) {
        throw RESOURCE.charsetEncoding(value, javaCharsetName).ex();
      }
    } else {
      this.charsetName = null;
      this.charset = null;
    }
    this.collation = collation;
    this.value = value;
  }
Ejemplo n.º 2
0
 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   String csn = cs.name();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   long t1 = System.nanoTime() / 1000;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   long t2 = System.nanoTime() / 1000;
   t.t = (t2 - t1) / iteration;
   if (cr != CoderResult.UNDERFLOW) {
     System.out.println("ENC-----------------");
     int pos = cbf.position();
     System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n", cr.toString(), pos, cc[pos] & 0xffff);
     throw new RuntimeException("Encoding err: " + csn);
   }
   byte[] bb = new byte[bbf.position()];
   bbf.flip();
   bbf.get(bb);
   return bb;
 }
Ejemplo n.º 3
0
 public static void main(String[] arguments) {
   try {
     // read byte data into a byte buffer
     String data = "friends.dat";
     FileInputStream inData = new FileInputStream(data);
     FileChannel inChannel = inData.getChannel();
     long inSize = inChannel.size();
     ByteBuffer source = ByteBuffer.allocate((int) inSize);
     inChannel.read(source, 0);
     source.position(0);
     System.out.println("Original byte data:");
     for (int i = 0; source.remaining() > 0; i++) {
       System.out.print(source.get() + " ");
     }
     // convert byte data into character data
     source.position(0);
     Charset ascii = Charset.forName("US-ASCII");
     CharsetDecoder toAscii = ascii.newDecoder();
     CharBuffer destination = toAscii.decode(source);
     destination.position(0);
     System.out.println("\n\nNew character data:");
     for (int i = 0; destination.remaining() > 0; i++) {
       System.out.print(destination.get());
     }
     System.out.println();
   } catch (FileNotFoundException fne) {
     System.out.println(fne.getMessage());
   } catch (IOException ioe) {
     System.out.println(ioe.getMessage());
   }
 }
Ejemplo n.º 4
0
  public static void main(String args[]) throws Exception {
    String inputFile = "samplein.txt";
    String outputFile = "sampleout.txt";

    RandomAccessFile inf = new RandomAccessFile(inputFile, "r");
    RandomAccessFile outf = new RandomAccessFile(outputFile, "rw");
    long inputLength = new File(inputFile).length();

    FileChannel inc = inf.getChannel();
    FileChannel outc = outf.getChannel();

    MappedByteBuffer inputData = inc.map(FileChannel.MapMode.READ_ONLY, 0, inputLength);

    Charset latin1 = Charset.forName("ISO-8859-1");
    CharsetDecoder decoder = latin1.newDecoder();
    CharsetEncoder encoder = latin1.newEncoder();

    CharBuffer cb = decoder.decode(inputData);

    // Process char data here

    ByteBuffer outputData = encoder.encode(cb);

    outc.write(outputData);

    inf.close();
    outf.close();
  }
Ejemplo n.º 5
0
  // check and compare canEncoding/Encoding
  static char[] checkEncoding(Charset oldCS, Charset newCS) throws Exception {
    System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
    CharsetEncoder encOLD = oldCS.newEncoder();
    CharsetEncoder encNew = newCS.newEncoder();
    char[] cc = new char[0x10000];
    int pos = 0;
    boolean is970 = "x-IBM970-Old".equals(oldCS.name());

    for (char c = 0; c < 0xffff; c++) {
      boolean canOld = encOLD.canEncode(c);
      boolean canNew = encNew.canEncode(c);

      if (is970 && c == 0x2299) continue;

      if (canOld != canNew) {
        if (canNew) {
          System.out.printf("      NEW(only): ");
          printEntry(c, newCS);
        } else {
          if (is970) {
            byte[] bb = new String(new char[] {c}).getBytes(oldCS);
            if (bb.length == 2 && bb[0] == (byte) 0xa2 && bb[1] == (byte) 0xc1) {
              // we know 970 has bogus nnnn -> a2c1 -> 2299
              continue;
            }
          }
          System.out.printf("      OLD(only): ");
          printEntry(c, oldCS);
        }
      } else if (canNew) {
        byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
        byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
        if (!Arrays.equals(bbNew, bbOld)) {
          System.out.printf("      c->b NEW: ");
          printEntry(c, newCS);
          System.out.printf("      c->b OLD: ");
          printEntry(c, oldCS);
        } else {
          String sNew = new String(bbNew, newCS);
          String sOld = new String(bbOld, oldCS);
          if (!sNew.equals(sOld)) {
            System.out.printf("      b2c NEW (c=%x):", c & 0xffff);
            printEntry(sNew.charAt(0), newCS);
            System.out.printf("      b2c OLD:");
            printEntry(sOld.charAt(0), oldCS);
          }
        }
      }
      if (canNew & canOld) { // added only both for now
        cc[pos++] = c;
      }
    }
    return Arrays.copyOf(cc, pos);
  }
Ejemplo n.º 6
0
  static void checkUnderOverflow(Charset cs) throws Exception {
    String csn = cs.name();
    System.out.printf("Check under/overflow <%s>...%n", csn);
    CharsetDecoder dec = cs.newDecoder();
    boolean failed = false;

    // 7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
    // 0   1 2   3         7         11
    byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
    int inlen = bytes.length;

    int MAXOFF = 20;
    for (int inoff = 0; inoff < MAXOFF; inoff++) {
      for (int outoff = 0; outoff < MAXOFF; outoff++) {
        int[][] Flows = {
          // inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
          // overflow
          {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1},
          {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1},
          {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1},
          {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1},
          {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1},
          {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0},
          // underflow
          {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0},
          {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0},
          {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0},
          {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0},
          {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0},
          {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0},
          {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0},
          {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0},
          {inoff, 11, outoff, 6, inoff + 11, outoff + 4, 0},
          {inoff, 12, outoff, 6, inoff + 11, outoff + 4, 0},
          {inoff, 15, outoff, 6, inoff + 15, outoff + 6, 0},
          // 2-byte under/overflow
          {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0},
          {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1},
          {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0},
        };
        for (boolean direct : new boolean[] {false, true}) {
          for (int[] flow : Flows) {
            if (!check(dec, bytes, direct, flow)) failed = true;
          }
        }
      }
    }
    if (failed) throw new RuntimeException("Check under/overflow failed " + csn);
  }
Ejemplo n.º 7
0
  static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception {
    System.gc(); // enqueue finalizable objects
    Thread.sleep(1000);
    System.gc(); // enqueue finalizable objects

    String csn1 = cs1.name();
    String csn2 = cs2.name();
    System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);

    Time t1 = new Time();
    Time t2 = new Time();

    byte[] bb1 = encode(cc, cs1, false, t1);
    byte[] bb2 = encode(cc, cs2, false, t2);

    System.out.printf(
        "    Encoding TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));
    if (!Arrays.equals(bb1, bb2)) {
      System.out.printf("        encoding failed%n");
    }

    char[] cc2 = decode(bb1, cs2, false, t2);
    char[] cc1 = decode(bb1, cs1, false, t1);
    System.out.printf(
        "    Decoding TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));
    if (!Arrays.equals(cc1, cc2)) {
      System.out.printf("        decoding failed%n");
    }

    bb1 = encode(cc, cs1, true, t1);
    bb2 = encode(cc, cs2, true, t2);

    System.out.printf(
        "    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));

    if (!Arrays.equals(bb1, bb2)) System.out.printf("        encoding (direct) failed%n");

    cc1 = decode(bb1, cs1, true, t1);
    cc2 = decode(bb1, cs2, true, t2);
    System.out.printf(
        "    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
        csn2, csn1, t2.t, t1.t, (double) (t2.t) / (t1.t));
    if (!Arrays.equals(cc1, cc2)) {
      System.out.printf("        decoding (direct) failed%n");
    }
  }
Ejemplo n.º 8
0
  // Check if DatagramChannel.send while connected can include
  // address without throwing
  private static void test1() throws Exception {

    DatagramChannel sndChannel = DatagramChannel.open();
    sndChannel.socket().bind(null);
    InetSocketAddress sender =
        new InetSocketAddress(InetAddress.getLocalHost(), sndChannel.socket().getLocalPort());

    DatagramChannel rcvChannel = DatagramChannel.open();
    rcvChannel.socket().bind(null);
    InetSocketAddress receiver =
        new InetSocketAddress(InetAddress.getLocalHost(), rcvChannel.socket().getLocalPort());

    rcvChannel.connect(sender);
    sndChannel.connect(receiver);

    ByteBuffer bb = ByteBuffer.allocate(256);
    bb.put("hello".getBytes());
    bb.flip();
    int sent = sndChannel.send(bb, receiver);
    bb.clear();
    rcvChannel.receive(bb);
    bb.flip();
    CharBuffer cb = Charset.forName("US-ASCII").newDecoder().decode(bb);
    if (!cb.toString().startsWith("h")) throw new RuntimeException("Test failed");

    rcvChannel.close();
    sndChannel.close();
  }
Ejemplo n.º 9
0
  // Check if the datagramsocket adaptor can send with a packet
  // that has not been initialized with an address; the legacy
  // datagram socket will send in this case
  private static void test2() throws Exception {
    DatagramChannel sndChannel = DatagramChannel.open();
    sndChannel.socket().bind(null);
    InetSocketAddress sender =
        new InetSocketAddress(InetAddress.getLocalHost(), sndChannel.socket().getLocalPort());

    DatagramChannel rcvChannel = DatagramChannel.open();
    rcvChannel.socket().bind(null);
    InetSocketAddress receiver =
        new InetSocketAddress(InetAddress.getLocalHost(), rcvChannel.socket().getLocalPort());

    rcvChannel.connect(sender);
    sndChannel.connect(receiver);

    byte b[] = "hello".getBytes("UTF-8");
    DatagramPacket pkt = new DatagramPacket(b, b.length);
    sndChannel.socket().send(pkt);

    ByteBuffer bb = ByteBuffer.allocate(256);
    rcvChannel.receive(bb);
    bb.flip();
    CharBuffer cb = Charset.forName("US-ASCII").newDecoder().decode(bb);
    if (!cb.toString().startsWith("h")) throw new RuntimeException("Test failed");

    // Check that the pkt got set with the target address;
    // This is legacy behavior
    if (!pkt.getSocketAddress().equals(receiver)) throw new RuntimeException("Test failed");

    rcvChannel.close();
    sndChannel.close();
  }
Ejemplo n.º 10
0
 /**
  * Constructor.
  *
  * @param enc encoding
  * @throws IOException I/O exception
  */
 private Generic(final String enc) throws IOException {
   try {
     csd = Charset.forName(enc).newDecoder();
   } catch (final Exception ex) {
     throw new EncodingException(ex);
   }
 }
Ejemplo n.º 11
0
  public static void main(String[] args) throws Exception {
    for (String csname : csnames) {
      System.out.printf("-----------------------------------%n");
      String oldname = csname + "_OLD";
      checkInit(csname);
      Charset csOld = (Charset) Class.forName(oldname).newInstance();
      Charset csNew = Charset.forName(csname);
      char[] cc = checkEncoding(csOld, csNew);
      checkDecoding(csOld, csNew);
      compare(csNew, csOld, cc);

      if (csname.startsWith("x-IBM93")) {
        // ecdbic
        checkMalformed(
            csNew,
            new byte[][] {
              {1, 0x26, 0x0f, 0x27}, // in SBSC, no SI
              {1, 0x0e, 0x41, 0x41, 0xe}, // in DBSC, no SO
              {2, 0x0e, 0x40, 0x41, 0xe}, // illegal DB
            });
      } else if (csname.equals("x-IBM970") || csname.equals("x-IBM1383")) {
        // euc_simple
        checkMalformed(
            csNew,
            new byte[][] {
              {1, 0x26, (byte) 0x8f, 0x27}, // SS2
              {1, (byte) 0xa1, (byte) 0xa1, (byte) 0x8e, 0x51}, // SS3
            });
      }
    }
  }
Ejemplo n.º 12
0
 static void checkMalformed(Charset cs, byte[][] malformed) throws Exception {
   boolean failed = false;
   String csn = cs.name();
   System.out.printf("Check malformed <%s>...%n", csn);
   for (boolean direct : new boolean[] {false, true}) {
     for (byte[] bins : malformed) {
       int mlen = bins[0];
       byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
       CoderResult cr = decodeCR(bin, cs, direct);
       String ashex = "";
       for (int i = 0; i < bin.length; i++) {
         if (i > 0) ashex += " ";
         ashex += Integer.toString((int) bin[i] & 0xff, 16);
       }
       if (!cr.isMalformed()) {
         System.out.printf(
             "        FAIL(direct=%b): [%s] not malformed. -->cr=%s\n",
             direct, ashex, cr.toString());
         failed = true;
       } else if (cr.length() != mlen) {
         System.out.printf(
             "        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
         failed = true;
       }
     }
   }
   if (failed) throw new RuntimeException("Check malformed failed " + csn);
 }
Ejemplo n.º 13
0
 EncodingConverter(String encoding) {
   Name = encoding;
   myDecoder =
       Charset.forName(encoding)
           .newDecoder()
           .onMalformedInput(CodingErrorAction.REPLACE)
           .onUnmappableCharacter(CodingErrorAction.REPLACE);
 }
Ejemplo n.º 14
0
  static void checkInit(String csn) throws Exception {
    System.out.printf("Check init <%s>...%n", csn);
    Charset.forName("Big5"); // load in the ExtendedCharsets
    long t1 = System.nanoTime() / 1000;
    Charset cs = Charset.forName(csn);
    long t2 = System.nanoTime() / 1000;
    System.out.printf("    charset     :%d%n", t2 - t1);
    t1 = System.nanoTime() / 1000;
    cs.newDecoder();
    t2 = System.nanoTime() / 1000;
    System.out.printf("    new Decoder :%d%n", t2 - t1);

    t1 = System.nanoTime() / 1000;
    cs.newEncoder();
    t2 = System.nanoTime() / 1000;
    System.out.printf("    new Encoder :%d%n", t2 - t1);
  }
Ejemplo n.º 15
0
 private void recv(SocketChannel sc, ClientInfo ci) throws IOException {
   ci.channel.read(ci.inBuf);
   ByteBuffer tmpBuf = ci.inBuf.duplicate();
   tmpBuf.flip();
   int bytesProcessed = 0;
   boolean doneLoop = false;
   while (!doneLoop) {
     byte b;
     try {
       b = tmpBuf.get();
     } catch (BufferUnderflowException bue) {
       // Processed all data in buffer
       ci.inBuf.clear();
       doneLoop = true;
       break;
     }
     switch (b) {
       case TypeServerConstants.WELCOME:
         bytesProcessed++;
         break;
       case TypeServerConstants.GET_STRING_REQUEST:
         bytesProcessed++;
         if (ci.outputPending) {
           // Client is backed up. We can't append to
           // the byte buffer because it's in the wrong
           // state. We could allocate another buffer
           // here and change our send method to know
           // about multiple buffers, but we'll just
           // assume that the client is dead
           break;
         }
         ci.outBuf.put(TypeServerConstants.GET_STRING_RESPONSE);
         ByteBuffer strBuf = encoder.encode(testString);
         ci.outBuf.putShort((short) strBuf.remaining());
         ci.outBuf.put(strBuf);
         ci.outBuf.flip();
         send(sc, ci);
         break;
       case TypeServerConstants.GET_STRING_RESPONSE:
         int startPos = tmpBuf.position();
         try {
           int nBytes = tmpBuf.getInt();
           byte[] buf = new byte[nBytes];
           tmpBuf.get(buf);
           bytesProcessed += buf.length + 5;
           String s = new String(buf);
           // Send the string to the GUI
           break;
         } catch (BufferUnderflowException bue) {
           // Processed all available data
           ci.inBuf.position(ci.inBuf.position() + bytesProcessed);
           doneLoop = true;
         }
         break;
     }
   }
 }
Ejemplo n.º 16
0
 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t) throws Exception {
   String csn = cs.name();
   CharsetDecoder dec = cs.newDecoder();
   ByteBuffer bbf;
   CharBuffer cbf;
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(bb.length);
     cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer();
     bbf.put(bb);
   } else {
     bbf = ByteBuffer.wrap(bb);
     cbf = CharBuffer.allocate(bb.length);
   }
   CoderResult cr = null;
   long t1 = System.nanoTime() / 1000;
   for (int i = 0; i < iteration; i++) {
     bbf.rewind();
     cbf.clear();
     dec.reset();
     cr = dec.decode(bbf, cbf, true);
   }
   long t2 = System.nanoTime() / 1000;
   t.t = (t2 - t1) / iteration;
   if (cr != CoderResult.UNDERFLOW) {
     System.out.println("DEC-----------------");
     int pos = bbf.position();
     System.out.printf(
         "  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
         cr.toString(),
         pos,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff,
         bb[pos++] & 0xff);
     throw new RuntimeException("Decoding err: " + csn);
   }
   char[] cc = new char[cbf.position()];
   cbf.flip();
   cbf.get(cc);
   return cc;
 }
 public static void main(String[] args) throws Exception {
   // 创建简体中文对应的Charset
   Charset cn = Charset.forName("GBK");
   // 获取cn对象对应的编码器和解码器
   CharsetEncoder cnEncoder = cn.newEncoder();
   CharsetDecoder cnDecoder = cn.newDecoder();
   // 创建一个CharBuffer对象
   CharBuffer cbuff = CharBuffer.allocate(8);
   cbuff.put('孙');
   cbuff.put('悟');
   cbuff.put('空');
   cbuff.flip();
   // 将CharBuffer中的字符序列转换成字节序列
   ByteBuffer bbuff = cnEncoder.encode(cbuff);
   // 循环访问ByteBuffer中的每个字节
   for (int i = 0; i < bbuff.capacity(); i++) {
     System.out.print(bbuff.get(i) + " ");
   }
   // 将ByteBuffer的数据解码成字符序列
   System.out.println("\n" + cnDecoder.decode(bbuff));
 }
Ejemplo n.º 18
0
  /**
   * Get text content of a source file.
   *
   * @param path The canonical path of source file.
   * @param charset Source file encoding.
   * @return Source file content.
   */
  public String read(String path, String charset) {
    String str = null;
    byte[] bin = read(path);

    try {
      str = Charset.forName(charset).newDecoder().decode(ByteBuffer.wrap(bin)).toString();
    } catch (CharacterCodingException e) {
      App.exit("Cannot read " + path + " as " + charset + " encoded file");
    }

    return str;
  }
Ejemplo n.º 19
0
  public static void main(String[] args) throws Throwable {

    for (Boolean hasSM : new boolean[] {false, true}) {
      if (hasSM) System.setSecurityManager(new PermissiveSecurityManger());
      for (Charset cs : Charset.availableCharsets().values()) {
        if ("ISO-2022-CN".equals(cs.name())
            || "x-COMPOUND_TEXT".equals(cs.name())
            || "x-JISAutoDetect".equals(cs.name())) continue;
        System.out.printf("Testing(sm=%b) " + cs.name() + "....", hasSM);
        // full bmp first
        char[] bmpCA = new char[0x10000];
        for (int i = 0; i < 0x10000; i++) {
          bmpCA[i] = (char) i;
        }
        byte[] sbBA = new byte[0x100];
        for (int i = 0; i < 0x100; i++) {
          sbBA[i] = (byte) i;
        }
        test(cs, bmpCA, sbBA);
        // "randomed" sizes
        Random rnd = new Random();
        for (int i = 0; i < 10; i++) {
          int clen = rnd.nextInt(0x10000);
          int blen = rnd.nextInt(0x100);
          // System.out.printf("    blen=%d, clen=%d%n", blen, clen);
          test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen));
          // add a pair of surrogates
          int pos = clen / 2;
          if ((pos + 1) < blen) {
            bmpCA[pos] = '\uD800';
            bmpCA[pos + 1] = '\uDC00';
          }
          test(cs, Arrays.copyOf(bmpCA, clen), Arrays.copyOf(sbBA, blen));
        }

        testMixed(cs);
        System.out.println("done!");
      }
    }
  }
Ejemplo n.º 20
0
  /**
   * Decode file charset.
   *
   * @param f File to process.
   * @return File charset.
   * @throws IOException in case of error.
   */
  public static Charset decode(File f) throws IOException {
    SortedMap<String, Charset> charsets = Charset.availableCharsets();

    String[] firstCharsets = {
      Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE"
    };

    Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size());

    for (String c : firstCharsets)
      if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c));

    orderedCharsets.addAll(charsets.values());

    try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
      FileChannel ch = raf.getChannel();

      ByteBuffer buf = ByteBuffer.allocate(4096);

      ch.read(buf);

      buf.flip();

      for (Charset charset : orderedCharsets) {
        CharsetDecoder decoder = charset.newDecoder();

        decoder.reset();

        try {
          decoder.decode(buf);

          return charset;
        } catch (CharacterCodingException ignored) {
        }
      }
    }

    return Charset.defaultCharset();
  }
Ejemplo n.º 21
0
  /**
   * Check if the first X characters of a byte stream match a String.
   *
   * @param data The byte array to process
   * @param pattern The String to match
   * @return True if the pattern was found, false otherwise
   */
  private static boolean bytesEqualsString(byte[] data, String pattern) {
    byte[] bytes = new byte[pattern.length()];
    Charset csets = Charset.forName("US-ASCII");
    boolean fin = false;
    int currChar = 0;

    // remove any CR and/or LF characters at the beginning of the article
    // data
    while (!fin) {
      if (currChar >= data.length) break;

      byte in = data[currChar];
      ByteBuffer bb = ByteBuffer.wrap(new byte[] {(byte) in});
      CharBuffer cb = csets.decode(bb);
      char c = cb.charAt(0);

      if (data.length > 0 && (c == '\n' || c == '\r')) currChar++;
      else fin = true;

      if (data.length == 0) fin = true;
    }

    // extract bytes (chars) to check from article data
    for (int i = 0; i < bytes.length && i < data.length; i++, currChar++) {
      byte in = data[currChar];
      bytes[i] = (byte) in;
    }

    // decode byte data to characters
    ByteBuffer bb = ByteBuffer.wrap(bytes);
    CharBuffer cb = csets.decode(bb);

    // compare these characters to the pattern String
    for (int i = 0; i < pattern.length(); i++) if (cb.charAt(i) != pattern.charAt(i)) return false;

    return true;
  }
Ejemplo n.º 22
0
 public static void main(String args[]) throws Exception {
   String s = "abc\uD800\uDC00qrst"; // Valid surrogate
   char[] c = s.toCharArray();
   CharsetEncoder enc =
       Charset.forName("ISO8859_1").newEncoder().onUnmappableCharacter(CodingErrorAction.REPLACE);
   /* Process the first 4 characters, including the high surrogate
   which should be stored */
   ByteBuffer bb = ByteBuffer.allocate(10);
   CharBuffer cb = CharBuffer.wrap(c);
   cb.limit(4);
   enc.encode(cb, bb, false);
   cb.limit(7);
   enc.encode(cb, bb, true);
   byte[] first = bb.array();
   for (int i = 0; i < 7; i++)
     System.err.printf("[%d]=%d was %d\n", i, (int) first[i] & 0xffff, (int) c[i] & 0xffff);
 }
Ejemplo n.º 23
0
  static void testMixed(Charset cs) throws Throwable {
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    List<Integer> cps = new ArrayList<>(0x10000);
    int off = 0;
    int cp = 0;
    while (cp < 0x10000) {
      if (enc.canEncode((char) cp)) {
        cps.add(cp);
      }
      cp++;
    }
    Collections.shuffle(cps);
    char[] bmpCA = new char[cps.size()];
    for (int i = 0; i < cps.size(); i++) bmpCA[i] = (char) (int) cps.get(i);
    String bmpStr = new String(bmpCA);
    // getBytes(csn);
    byte[] bmpBA = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(bmpBA, baNIO)) {
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
    }

    // getBytes(cs);
    bmpBA = bmpStr.getBytes(cs);
    if (!Arrays.equals(bmpBA, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(bmpBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
    if (!strNIO.equals(strSC)) {
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());
    }

    // new String(cs);
    strSC = new String(bmpBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());
  }
Ejemplo n.º 24
0
 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect) throws Exception {
   ByteBuffer bbf;
   CharBuffer cbf;
   CharsetEncoder enc = cs.newEncoder();
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(cc.length * 4);
     cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
     cbf.put(cc).flip();
   } else {
     bbf = ByteBuffer.allocate(cc.length * 4);
     cbf = CharBuffer.wrap(cc);
   }
   CoderResult cr = null;
   for (int i = 0; i < iteration; i++) {
     cbf.rewind();
     bbf.clear();
     enc.reset();
     cr = enc.encode(cbf, bbf, true);
   }
   return cr;
 }
Ejemplo n.º 25
0
 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect) throws Exception {
   CharsetDecoder dec = cs.newDecoder();
   ByteBuffer bbf;
   CharBuffer cbf;
   if (testDirect) {
     bbf = ByteBuffer.allocateDirect(bb.length);
     cbf = ByteBuffer.allocateDirect(bb.length * 2).asCharBuffer();
     bbf.put(bb).flip();
   } else {
     bbf = ByteBuffer.wrap(bb);
     cbf = CharBuffer.allocate(bb.length);
   }
   CoderResult cr = null;
   for (int i = 0; i < iteration; i++) {
     bbf.rewind();
     cbf.clear();
     dec.reset();
     cr = dec.decode(bbf, cbf, true);
   }
   return cr;
 }
Ejemplo n.º 26
0
// 感觉如果没有看过 NIO 的详细内容是不会懂这部分
public class TypeServer extends TCPNIOServer {
  static String testString = "Thisisateststring";

  static class ClientInfo {
    ByteBuffer inBuf = ByteBuffer.allocateDirect(512);
    ByteBuffer outBuf = ByteBuffer.allocateDirect(512);
    boolean outputPending = false;
    SocketChannel channel;
  }

  Map allClients = new HashMap();
  Charset encoder = Charset.forName("UTF-8");

  protected void handleClient(SelectionKey key) throws IOException {
    SocketChannel sc = (SocketChannel) key.channel();
    ClientInfo ci = (ClientInfo) allClients.get(sc);
    if (ci == null) throw new IllegalStateException("Unknown client");
    if (key.isWritable()) send(sc, ci);
    if (key.isReadable())
      // 从一个客户端读进所有的可用数据
      recv(sc, ci);
  }

  private void recv(SocketChannel sc, ClientInfo ci) throws IOException {
    ci.channel.read(ci.inBuf);
    ByteBuffer tmpBuf = ci.inBuf.duplicate();
    tmpBuf.flip();
    int bytesProcessed = 0;
    boolean doneLoop = false;
    while (!doneLoop) {
      byte b;
      try {
        b = tmpBuf.get();
      } catch (BufferUnderflowException bue) {
        // Processed all data in buffer
        ci.inBuf.clear();
        doneLoop = true;
        break;
      }
      switch (b) {
        case TypeServerConstants.WELCOME:
          bytesProcessed++;
          break;
        case TypeServerConstants.GET_STRING_REQUEST:
          bytesProcessed++;
          if (ci.outputPending) {
            // Client is backed up. We can't append to
            // the byte buffer because it's in the wrong
            // state. We could allocate another buffer
            // here and change our send method to know
            // about multiple buffers, but we'll just
            // assume that the client is dead
            break;
          }
          ci.outBuf.put(TypeServerConstants.GET_STRING_RESPONSE);
          ByteBuffer strBuf = encoder.encode(testString);
          ci.outBuf.putShort((short) strBuf.remaining());
          ci.outBuf.put(strBuf);
          ci.outBuf.flip();
          send(sc, ci);
          break;
        case TypeServerConstants.GET_STRING_RESPONSE:
          int startPos = tmpBuf.position();
          try {
            int nBytes = tmpBuf.getInt();
            byte[] buf = new byte[nBytes];
            tmpBuf.get(buf);
            bytesProcessed += buf.length + 5;
            String s = new String(buf);
            // Send the string to the GUI
            break;
          } catch (BufferUnderflowException bue) {
            // Processed all available data
            ci.inBuf.position(ci.inBuf.position() + bytesProcessed);
            doneLoop = true;
          }
          break;
      }
    }
  }

  private void send(SocketChannel sc, ClientInfo ci) throws IOException {
    int len = ci.outBuf.remaining();
    int nBytes = sc.write(ci.outBuf);
    if (nBytes != len) {
      // Client not ready to receive data
      ci.outputPending = true;
      ci.channel.register(selector, SelectionKey.OP_READ | SelectionKey.OP_WRITE);
    } else {
      ci.outBuf.clear();
      if (ci.outputPending) {
        ci.outputPending = false;
        ci.channel.register(selector, SelectionKey.OP_READ);
      }
    }
  }

  protected void registeredClient(SocketChannel sc) throws IOException {
    ClientInfo ci = new ClientInfo();
    ci.channel = sc;
    ci.outBuf.clear();
    ci.outBuf.put(TypeServerConstants.WELCOME);
    ci.outBuf.flip();
    allClients.put(sc, ci);
    send(sc, ci);
  }

  public static void main(String[] args) throws Exception {
    TypeServer ts = new TypeServer();
    ts.port = Integer.parseInt(args[0]);
    Thread t = new Thread(ts);
    t.start();
    System.out.println("Type server ready...Type CTRL-D to exit");
    while (System.in.read() > 0) ;
    ts.stopServer();
    t.join();
  }
}
Ejemplo n.º 27
0
  public static void main(String[] args) throws Throwable {
    final int itrs = Integer.getInteger("iterations", 100000);
    // final int itrs = Integer.getInteger("iterations", 12);
    final int size = Integer.getInteger("size", 2048);
    final int subsize = Integer.getInteger("subsize", 128);
    final int maxchar = Integer.getInteger("maxchar", 128);
    final String regex = System.getProperty("filter");
    final Pattern filter = (regex == null) ? null : Pattern.compile(regex);
    final boolean useSecurityManager = Boolean.getBoolean("SecurityManager");
    if (useSecurityManager) System.setSecurityManager(new PermissiveSecurityManger());
    final Random rnd = new Random();

    String[] csns =
        new String[] {
          "Big5",
          "Johab",
          "EUC_CN",
          "EUC_KR",
          "MS932",
          "MS936",
          "MS949",
          "MS950",
          "GBK",
          "Big5_HKSCS",
          "Big5_HKSCS_2001",
          "Big5_Solaris",
          "MS950_HKSCS",
          "MS950_HKSCS_XP",
          "IBM1364",
          "IBM1381",
          "IBM1383",
          "IBM930",
          "IBM933",
          "IBM935",
          "IBM937",
          "IBM939",
          "IBM942",
          "IBM943",
          "IBM948",
          "IBM949",
          "IBM950",
          "IBM970",
        };

    ArrayList<long[]> sum = new ArrayList<>();

    for (final String csn : csns) {
      final Charset cs = Charset.forName(csn);
      List<Integer> cps = new ArrayList<>(0x4000);
      int off = 0;
      int cp = 0;
      int n = 0;
      CharsetEncoder enc = cs.newEncoder();
      while (cp < 0x10000 && n < cps.size()) {
        if (enc.canEncode((char) cp)) {
          cps.add(cp);
          n++;
        }
        cp++;
      }
      Collections.shuffle(cps);
      char[] ca = new char[cps.size()];
      for (int i = 0; i < cps.size(); i++) ca[i] = (char) (int) cps.get(i);

      System.out.printf("%n--------%s---------%n", csn);
      for (int sz = 8; sz <= 2048; sz *= 2) {
        System.out.printf("   [len=%d]%n", sz);

        final char[] chars = Arrays.copyOf(ca, sz);
        final String str = new String(chars);
        final byte[] bs = str.getBytes(cs);

        Job[] jobs = {
          new Job("String decode: csn") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) new String(bs, csn);
            }
          },
          new Job("String decode: cs") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) new String(bs, cs);
            }
          },
          new Job("String encode: csn") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) str.getBytes(csn);
            }
          },
          new Job("String encode: cs") {
            public void work() throws Throwable {
              for (int i = 0; i < itrs; i++) str.getBytes(cs);
            }
          },
        };
        sum.add(time(jobs));
      }
    }
  }
 ByteToCharConverter(Charset charset, String encoding) {
   super(encoding);
   decoder = charset.newDecoder().onUnmappableCharacter(CodingErrorAction.REPLACE);
   // for compatibility to old ByteToCharASCII converter:
   if (charset.name().equals("US-ASCII")) decoder.onMalformedInput(CodingErrorAction.REPLACE);
 }
Ejemplo n.º 29
0
  // check and compare canEncoding/Encoding
  static void checkDecoding(Charset oldCS, Charset newCS) throws Exception {
    System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name());
    boolean isEBCDIC = oldCS.name().startsWith("x-IBM93");

    // Try singlebyte first
    byte[] bb = new byte[1];
    System.out.printf("       trying SB...%n");
    for (int b = 0; b < 0x100; b++) {
      bb[0] = (byte) b;
      String sOld = new String(bb, oldCS);
      String sNew = new String(bb, newCS);
      if (!sOld.equals(sNew)) {
        System.out.printf(
            "        b=%x:  %x/%d(old)  %x/%d(new)%n",
            b & 0xff,
            sOld.charAt(0) & 0xffff,
            sOld.length(),
            sNew.charAt(0) & 0xffff,
            sNew.length());
      }
    }

    System.out.printf("       trying DB...%n");
    bb = new byte[isEBCDIC ? 4 : 2];
    int b1Min = 0x40;
    int b1Max = 0xfe;
    for (int b1 = 0x40; b1 < 0xff; b1++) {
      if (!isEBCDIC) {
        // decodable singlebyte b1
        bb[0] = (byte) b1;
        String sOld = new String(bb, oldCS);
        String sNew = new String(bb, newCS);
        if (!sOld.equals(sNew)) {
          if (sOld.length() != 2 && sOld.charAt(0) != 0) {
            // only prints we are NOT expected. above two are known issue
            System.out.printf(
                "        b1=%x:  %x/%d(old)  %x/%d(new)%n",
                b1 & 0xff,
                sOld.charAt(0) & 0xffff,
                sOld.length(),
                sNew.charAt(0) & 0xffff,
                sNew.length());
            continue;
          }
        }
      }
      for (int b2 = 0x40; b2 < 0xff; b2++) {
        if (isEBCDIC) {
          bb[0] = 0x0e;
          bb[1] = (byte) b1;
          bb[2] = (byte) b2;
          bb[3] = 0x0f;
        } else {
          bb[0] = (byte) b1;
          bb[1] = (byte) b2;
        }
        String sOld = new String(bb, oldCS);
        String sNew = new String(bb, newCS);
        // if (!sOld.equals(sNew)) {
        if (sOld.charAt(0) != sNew.charAt(0)) {

          if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd)
            continue; // known issude in old implementation

          System.out.printf(
              "        bb=<%x,%x>  c(old)=%x,  c(new)=%x%n",
              b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff);
        }
      }
    }
  }
Ejemplo n.º 30
0
  static void test(Charset cs, char[] bmpCA, byte[] sbBA) throws Throwable {
    String bmpStr = new String(bmpCA);
    CharsetDecoder dec =
        cs.newDecoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);
    CharsetEncoder enc =
        cs.newEncoder()
            .onMalformedInput(CodingErrorAction.REPLACE)
            .onUnmappableCharacter(CodingErrorAction.REPLACE);

    // getBytes(csn);
    byte[] baSC = bmpStr.getBytes(cs.name());
    ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
    byte[] baNIO = new byte[bf.limit()];
    bf.get(baNIO, 0, baNIO.length);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());

    // getBytes(cs);
    baSC = bmpStr.getBytes(cs);
    if (!Arrays.equals(baSC, baNIO))
      throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());

    // new String(csn);
    String strSC = new String(sbBA, cs.name());
    String strNIO = dec.reset().decode(ByteBuffer.wrap(sbBA)).toString();

    if (!strNIO.equals(strSC))
      throw new RuntimeException("new String(csn) failed  -> " + cs.name());

    // new String(cs);
    strSC = new String(sbBA, cs);
    if (!strNIO.equals(strSC)) throw new RuntimeException("new String(cs) failed  -> " + cs.name());

    // encode unmappable surrogates
    if (enc instanceof sun.nio.cs.ArrayEncoder && cs.contains(Charset.forName("ASCII"))) {
      if (cs.name().equals("UTF-8")
          || // utf8 handles surrogates
          cs.name().equals("CESU-8")) // utf8 handles surrogates
      return;
      enc.replaceWith(new byte[] {(byte) 'A'});
      sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder) enc;

      String str = "ab\uD800\uDC00\uD800\uDC00cd";
      byte[] ba = new byte[str.length() - 2];
      int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
        throw new RuntimeException("encode1(surrogates) failed  -> " + cs.name());

      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode2(surrogates) failed  -> " + cs.name());
      str = "ab\uD800B\uDC00Bcd";
      ba = new byte[str.length()];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, cs.name())))
        throw new RuntimeException("encode3(surrogates) failed  -> " + cs.name());
      /* sun.nio.cs.ArrayDeEncoder works on the assumption that the
         invoker (StringCoder) allocates enough output buf, utf8
         and double-byte coder does not check the output buffer limit.
      ba = new byte[str.length() - 1];
      n = cae.encode(str.toCharArray(), 0, str.length(), ba);
      if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
          throw new RuntimeException("encode4(surrogates) failed  -> "
                                     + cs.name());
      }
      */
    }
  }