/**
   * Decode file charset.
   *
   * @param f File to process.
   * @return File charset.
   * @throws IOException in case of error.
   */
  public static Charset decode(File f) throws IOException {
    SortedMap<String, Charset> charsets = Charset.availableCharsets();

    String[] firstCharsets = {
      Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE"
    };

    Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size());

    for (String c : firstCharsets)
      if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c));

    orderedCharsets.addAll(charsets.values());

    try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
      FileChannel ch = raf.getChannel();

      ByteBuffer buf = ByteBuffer.allocate(4096);

      ch.read(buf);

      buf.flip();

      for (Charset charset : orderedCharsets) {
        CharsetDecoder decoder = charset.newDecoder();

        decoder.reset();

        try {
          decoder.decode(buf);

          return charset;
        } catch (CharacterCodingException ignored) {
        }
      }
    }

    return Charset.defaultCharset();
  }
Exemple #2
0
  private void parsestories() {

    try {

      List<String> lns =
          Files.readAllLines(Paths.get("datasets/" + name + ".tsv"), Charset.defaultCharset());

      for (String ln : lns) stories.add(Story.fromtext(ln));

    } catch (IOException e) {

      System.out.println("Error reading dataset.");
      System.exit(1);
    }
  }
Exemple #3
0
 /**
  * Returns the connection charset. Defaults to {@link Charset} defaultCharset if not set
  *
  * @return charset
  */
 public String getCharset()
 {
   return charset == null ? Charset.defaultCharset().name() : charset;
 }