public TrigramLanguageGuesser(URL[] urls) throws IOException { tg = new TrigramGenerator(); tg.addTrigramListener(test); for (int i = 0; i < urls.length; i++) { URL url = urls[i]; String path = url.getPath(); int fileNameStart = path.lastIndexOf('/') + 1; int fileNameEnd = path.lastIndexOf('.'); String code = path.substring(fileNameStart, fileNameEnd); addFromStream(code, url.openStream()); } }
/* * Construct a LanguageGuesser * fileLocation is a directory containing * xx.tri files where xx is the ISO-639 Language Code * see http://ftp.ics.uci.edu/pub/ietf/http/related/iso639.txt */ public TrigramLanguageGuesser(String fileLocation) throws IOException { tg = new TrigramGenerator(); tg.addTrigramListener(test); File f = new File(fileLocation); if (f.isDirectory()) { String[] files = f.list(); for (int i = 0; i < files.length; i++) { if (files[i].endsWith("tri")) { addFile(f.getAbsolutePath() + File.separator + files[i]); } } } if (trigramsmap.size() == 0) throw new RuntimeException( "Location [ " + f.getAbsolutePath() + " ] doesn't contain any .tri file"); }