/** * Decode file charset. * * @param f File to process. * @return File charset. * @throws IOException in case of error. */ public static Charset decode(File f) throws IOException { SortedMap<String, Charset> charsets = Charset.availableCharsets(); String[] firstCharsets = { Charset.defaultCharset().name(), "US-ASCII", "UTF-8", "UTF-16BE", "UTF-16LE" }; Collection<Charset> orderedCharsets = U.newLinkedHashSet(charsets.size()); for (String c : firstCharsets) if (charsets.containsKey(c)) orderedCharsets.add(charsets.get(c)); orderedCharsets.addAll(charsets.values()); try (RandomAccessFile raf = new RandomAccessFile(f, "r")) { FileChannel ch = raf.getChannel(); ByteBuffer buf = ByteBuffer.allocate(4096); ch.read(buf); buf.flip(); for (Charset charset : orderedCharsets) { CharsetDecoder decoder = charset.newDecoder(); decoder.reset(); try { decoder.decode(buf); return charset; } catch (CharacterCodingException ignored) { } } } return Charset.defaultCharset(); }
private void parsestories() { try { List<String> lns = Files.readAllLines(Paths.get("datasets/" + name + ".tsv"), Charset.defaultCharset()); for (String ln : lns) stories.add(Story.fromtext(ln)); } catch (IOException e) { System.out.println("Error reading dataset."); System.exit(1); } }
/** * Returns the connection charset. Defaults to {@link Charset} defaultCharset if not set * * @return charset */ public String getCharset() { return charset == null ? Charset.defaultCharset().name() : charset; }