public void run(String[] args) { Parameters params = validateAndParseParams(args, Parameters.class); File testData = new File(params.getCensusData()); File dictOutFile = new File(params.getDict()); CmdLineUtil.checkInputFile("Name data", testData); CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile); FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData); ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(params.getEncoding())); Dictionary mDictionary; try { System.out.println("Creating Dictionary..."); mDictionary = createDictionary(sampleStream); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry this can fail.. } } System.out.println("Saving Dictionary..."); OutputStream out = null; try { out = new FileOutputStream(dictOutFile); mDictionary.serialize(out); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while writing dictionary file: " + e.getMessage(), e); } finally { if (out != null) try { out.close(); } catch (IOException e) { // file might be damaged throw new TerminateToolException( -1, "Attention: Failed to correctly write dictionary:" + e.getMessage(), e); } } }
/** * Creates a dictionary. * * @param sampleStream stream of samples. * @return a {@code Dictionary} class containing the name dictionary built from the input file. * @throws IOException IOException */ public static Dictionary createDictionary(ObjectStream<StringList> sampleStream) throws IOException { Dictionary mNameDictionary = new Dictionary(true); StringList entry; entry = sampleStream.read(); while (entry != null) { if (!mNameDictionary.contains(entry)) { mNameDictionary.put(entry); } entry = sampleStream.read(); } return mNameDictionary; }
/** * Reads a dictionary which has one entry per line. The tokens inside an entry are whitespace * delimited. * * @param in * @return the parsed dictionary * @throws IOException */ public static Dictionary parseOneEntryPerLine(Reader in) throws IOException { BufferedReader lineReader = new BufferedReader(in); Dictionary dictionary = new Dictionary(); String line; while ((line = lineReader.readLine()) != null) { StringTokenizer whiteSpaceTokenizer = new StringTokenizer(line, " "); String tokens[] = new String[whiteSpaceTokenizer.countTokens()]; if (tokens.length > 0) { int tokenIndex = 0; while (whiteSpaceTokenizer.hasMoreTokens()) { tokens[tokenIndex++] = whiteSpaceTokenizer.nextToken(); } dictionary.put(new StringList(tokens)); } } return dictionary; }