/** * auto-detect the charset of a file used code from http://jchardet.sourceforge.net/; see also: * http://www-archive.mozilla.org/projects/intl/chardet.html * * @param file * @return a set of probable charsets * @throws IOException */ public static Set<String> detectCharset(File file) throws IOException { // auto-detect charset, used code from http://jchardet.sourceforge.net/; see also: // http://www-archive.mozilla.org/projects/intl/chardet.html nsDetector det = new nsDetector(nsPSMDetector.ALL); BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file)); byte[] buf = new byte[1024]; int len; boolean done = false; boolean isAscii = true; while ((len = imp.read(buf, 0, buf.length)) != -1) { if (isAscii) isAscii = det.isAscii(buf, len); if (!isAscii && !done) done = det.DoIt(buf, len, false); } det.DataEnd(); Set<String> result = new HashSet<>(); if (isAscii) { result.add("ASCII"); } else { for (String c : det.getProbableCharsets()) result.add(c); } return result; }