public void run(String[] args) { if (0 == args.length) { System.out.println(getHelp()); } else { DoccatModel model = new DoccatModelLoader().load(new File(args[0])); DocumentCategorizerME doccat = new DocumentCategorizerME(model); ObjectStream<String> documentStream = new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(System.in))); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc"); perfMon.start(); try { String document; while ((document = documentStream.read()) != null) { double prob[] = doccat.categorize(document); String category = doccat.getBestCategory(prob); DocumentSample sample = new DocumentSample(category, document); System.out.println(sample.toString()); perfMon.incrementCounter(); } } catch (IOException e) { CmdLineUtil.handleStdinIoError(e); } perfMon.stopAndPrintFinalResult(); } }
@Test public void testReadingEvents() throws IOException { StringBuilder sample = new StringBuilder(); // First sample sentence sample.append("word11 tag11 pred11"); sample.append('\n'); sample.append("word12 tag12 pred12"); sample.append('\n'); sample.append("word13 tag13 pred13"); sample.append('\n'); // Start next sample sentence sample.append('\n'); // Second sample sentence sample.append("word21 tag21 pred21"); sample.append('\n'); sample.append("word22 tag22 pred22"); sample.append('\n'); sample.append("word23 tag23 pred23"); sample.append('\n'); ObjectStream<String> stringStream = new PlainTextByLineStream(new StringReader(sample.toString())); ObjectStream<ChunkSample> chunkStream = new ChunkSampleStream(stringStream); // read first sample ChunkSample firstSample = chunkStream.read(); assertEquals("word11", firstSample.getSentence()[0]); assertEquals("tag11", firstSample.getTags()[0]); assertEquals("pred11", firstSample.getPreds()[0]); assertEquals("word12", firstSample.getSentence()[1]); assertEquals("tag12", firstSample.getTags()[1]); assertEquals("pred12", firstSample.getPreds()[1]); assertEquals("word13", firstSample.getSentence()[2]); assertEquals("tag13", firstSample.getTags()[2]); assertEquals("pred13", firstSample.getPreds()[2]); // read second sample ChunkSample secondSample = chunkStream.read(); assertEquals("word21", secondSample.getSentence()[0]); assertEquals("tag21", secondSample.getTags()[0]); assertEquals("pred21", secondSample.getPreds()[0]); assertEquals("word22", secondSample.getSentence()[1]); assertEquals("tag22", secondSample.getTags()[1]); assertEquals("pred22", secondSample.getPreds()[1]); assertEquals("word23", secondSample.getSentence()[2]); assertEquals("tag23", secondSample.getTags()[2]); assertEquals("pred23", secondSample.getPreds()[2]); assertNull(chunkStream.read()); }
@Test public void testEvaluator() throws IOException, URISyntaxException { DictionaryNameFinder nameFinder = new DictionaryNameFinder(createDictionary()); TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(nameFinder, new NameEvaluationErrorListener()); ObjectStream<NameSample> sample = createSample(); evaluator.evaluate(sample); sample.close(); FMeasure fmeasure = evaluator.getFMeasure(); assertTrue(fmeasure.getFMeasure() == 1); assertTrue(fmeasure.getRecallScore() == 1); }
public void run(String[] args) { Parameters params = validateAndParseParams(args, Parameters.class); File testData = new File(params.getCensusData()); File dictOutFile = new File(params.getDict()); CmdLineUtil.checkInputFile("Name data", testData); CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile); FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData); ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(params.getEncoding())); Dictionary mDictionary; try { System.out.println("Creating Dictionary..."); mDictionary = createDictionary(sampleStream); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while reading training data or indexing data: " + e.getMessage(), e); } finally { try { sampleStream.close(); } catch (IOException e) { // sorry this can fail.. } } System.out.println("Saving Dictionary..."); OutputStream out = null; try { out = new FileOutputStream(dictOutFile); mDictionary.serialize(out); } catch (IOException e) { throw new TerminateToolException( -1, "IO error while writing dictionary file: " + e.getMessage(), e); } finally { if (out != null) try { out.close(); } catch (IOException e) { // file might be damaged throw new TerminateToolException( -1, "Attention: Failed to correctly write dictionary:" + e.getMessage(), e); } } }
/** * Creates a dictionary. * * @param sampleStream stream of samples. * @return a {@code Dictionary} class containing the name dictionary built from the input file. * @throws IOException IOException */ public static Dictionary createDictionary(ObjectStream<StringList> sampleStream) throws IOException { Dictionary mNameDictionary = new Dictionary(true); StringList entry; entry = sampleStream.read(); while (entry != null) { if (!mNameDictionary.contains(entry)) { mNameDictionary.put(entry); } entry = sampleStream.read(); } return mNameDictionary; }
/** * Creates a dictionary with all names from the sample data. * * @return a dictionary * @throws IOException * @throws URISyntaxException */ private static Dictionary createDictionary() throws IOException, URISyntaxException { ObjectStream<NameSample> sampleStream = createSample(); NameSample sample = sampleStream.read(); List<String[]> entries = new ArrayList<String[]>(); while (sample != null) { Span[] names = sample.getNames(); if (names != null && names.length > 0) { String[] toks = sample.getSentence(); for (Span name : names) { String[] nameToks = new String[name.length()]; System.arraycopy(toks, name.getStart(), nameToks, 0, name.length()); entries.add(nameToks); } } sample = sampleStream.read(); } sampleStream.close(); Dictionary dictionary = new Dictionary(true); for (String[] entry : entries) { StringList dicEntry = new StringList(entry); dictionary.put(dicEntry); } return dictionary; }
public void close() throws IOException { adSentenceStream.close(); }
public void reset() throws IOException, UnsupportedOperationException { adSentenceStream.reset(); }