Exemple #1
0
  public void run(String[] args) {

    if (0 == args.length) {
      System.out.println(getHelp());
    } else {

      DoccatModel model = new DoccatModelLoader().load(new File(args[0]));

      DocumentCategorizerME doccat = new DocumentCategorizerME(model);

      ObjectStream<String> documentStream =
          new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(System.in)));

      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
      perfMon.start();

      try {
        String document;
        while ((document = documentStream.read()) != null) {
          double prob[] = doccat.categorize(document);
          String category = doccat.getBestCategory(prob);

          DocumentSample sample = new DocumentSample(category, document);
          System.out.println(sample.toString());

          perfMon.incrementCounter();
        }
      } catch (IOException e) {
        CmdLineUtil.handleStdinIoError(e);
      }

      perfMon.stopAndPrintFinalResult();
    }
  }
  @Test
  public void testReadingEvents() throws IOException {

    StringBuilder sample = new StringBuilder();

    // First sample sentence
    sample.append("word11 tag11 pred11");
    sample.append('\n');
    sample.append("word12 tag12 pred12");
    sample.append('\n');
    sample.append("word13 tag13 pred13");
    sample.append('\n');

    // Start next sample sentence
    sample.append('\n');

    // Second sample sentence
    sample.append("word21 tag21 pred21");
    sample.append('\n');
    sample.append("word22 tag22 pred22");
    sample.append('\n');
    sample.append("word23 tag23 pred23");
    sample.append('\n');

    ObjectStream<String> stringStream =
        new PlainTextByLineStream(new StringReader(sample.toString()));

    ObjectStream<ChunkSample> chunkStream = new ChunkSampleStream(stringStream);

    // read first sample
    ChunkSample firstSample = chunkStream.read();
    assertEquals("word11", firstSample.getSentence()[0]);
    assertEquals("tag11", firstSample.getTags()[0]);
    assertEquals("pred11", firstSample.getPreds()[0]);
    assertEquals("word12", firstSample.getSentence()[1]);
    assertEquals("tag12", firstSample.getTags()[1]);
    assertEquals("pred12", firstSample.getPreds()[1]);
    assertEquals("word13", firstSample.getSentence()[2]);
    assertEquals("tag13", firstSample.getTags()[2]);
    assertEquals("pred13", firstSample.getPreds()[2]);

    // read second sample
    ChunkSample secondSample = chunkStream.read();
    assertEquals("word21", secondSample.getSentence()[0]);
    assertEquals("tag21", secondSample.getTags()[0]);
    assertEquals("pred21", secondSample.getPreds()[0]);
    assertEquals("word22", secondSample.getSentence()[1]);
    assertEquals("tag22", secondSample.getTags()[1]);
    assertEquals("pred22", secondSample.getPreds()[1]);
    assertEquals("word23", secondSample.getSentence()[2]);
    assertEquals("tag23", secondSample.getTags()[2]);
    assertEquals("pred23", secondSample.getPreds()[2]);

    assertNull(chunkStream.read());
  }
  @Test
  public void testEvaluator() throws IOException, URISyntaxException {
    DictionaryNameFinder nameFinder = new DictionaryNameFinder(createDictionary());
    TokenNameFinderEvaluator evaluator =
        new TokenNameFinderEvaluator(nameFinder, new NameEvaluationErrorListener());
    ObjectStream<NameSample> sample = createSample();

    evaluator.evaluate(sample);
    sample.close();
    FMeasure fmeasure = evaluator.getFMeasure();

    assertTrue(fmeasure.getFMeasure() == 1);
    assertTrue(fmeasure.getRecallScore() == 1);
  }
  public void run(String[] args) {
    Parameters params = validateAndParseParams(args, Parameters.class);

    File testData = new File(params.getCensusData());
    File dictOutFile = new File(params.getDict());

    CmdLineUtil.checkInputFile("Name data", testData);
    CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile);

    FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData);
    ObjectStream<StringList> sampleStream =
        new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(params.getEncoding()));

    Dictionary mDictionary;
    try {
      System.out.println("Creating Dictionary...");
      mDictionary = createDictionary(sampleStream);
    } catch (IOException e) {
      throw new TerminateToolException(
          -1, "IO error while reading training data or indexing data: " + e.getMessage(), e);
    } finally {
      try {
        sampleStream.close();
      } catch (IOException e) {
        // sorry this can fail..
      }
    }

    System.out.println("Saving Dictionary...");

    OutputStream out = null;

    try {
      out = new FileOutputStream(dictOutFile);
      mDictionary.serialize(out);
    } catch (IOException e) {
      throw new TerminateToolException(
          -1, "IO error while writing dictionary file: " + e.getMessage(), e);
    } finally {
      if (out != null)
        try {
          out.close();
        } catch (IOException e) {
          // file might be damaged
          throw new TerminateToolException(
              -1, "Attention: Failed to correctly write dictionary:" + e.getMessage(), e);
        }
    }
  }
  /**
   * Creates a dictionary.
   *
   * @param sampleStream stream of samples.
   * @return a {@code Dictionary} class containing the name dictionary built from the input file.
   * @throws IOException IOException
   */
  public static Dictionary createDictionary(ObjectStream<StringList> sampleStream)
      throws IOException {

    Dictionary mNameDictionary = new Dictionary(true);
    StringList entry;

    entry = sampleStream.read();
    while (entry != null) {
      if (!mNameDictionary.contains(entry)) {
        mNameDictionary.put(entry);
      }
      entry = sampleStream.read();
    }

    return mNameDictionary;
  }
 /**
  * Creates a dictionary with all names from the sample data.
  *
  * @return a dictionary
  * @throws IOException
  * @throws URISyntaxException
  */
 private static Dictionary createDictionary() throws IOException, URISyntaxException {
   ObjectStream<NameSample> sampleStream = createSample();
   NameSample sample = sampleStream.read();
   List<String[]> entries = new ArrayList<String[]>();
   while (sample != null) {
     Span[] names = sample.getNames();
     if (names != null && names.length > 0) {
       String[] toks = sample.getSentence();
       for (Span name : names) {
         String[] nameToks = new String[name.length()];
         System.arraycopy(toks, name.getStart(), nameToks, 0, name.length());
         entries.add(nameToks);
       }
     }
     sample = sampleStream.read();
   }
   sampleStream.close();
   Dictionary dictionary = new Dictionary(true);
   for (String[] entry : entries) {
     StringList dicEntry = new StringList(entry);
     dictionary.put(dicEntry);
   }
   return dictionary;
 }
 public void close() throws IOException {
   adSentenceStream.close();
 }
 public void reset() throws IOException, UnsupportedOperationException {
   adSentenceStream.reset();
 }