@Test
  public void testEvaluator() throws IOException, URISyntaxException {
    DictionaryNameFinder nameFinder = new DictionaryNameFinder(createDictionary());
    TokenNameFinderEvaluator evaluator =
        new TokenNameFinderEvaluator(nameFinder, new NameEvaluationErrorListener());
    ObjectStream<NameSample> sample = createSample();

    evaluator.evaluate(sample);
    sample.close();
    FMeasure fmeasure = evaluator.getFMeasure();

    assertTrue(fmeasure.getFMeasure() == 1);
    assertTrue(fmeasure.getRecallScore() == 1);
  }
예제 #2
0
  public void run(String[] args) {
    Parameters params = validateAndParseParams(args, Parameters.class);

    File testData = new File(params.getCensusData());
    File dictOutFile = new File(params.getDict());

    CmdLineUtil.checkInputFile("Name data", testData);
    CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile);

    FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData);
    ObjectStream<StringList> sampleStream =
        new NameFinderCensus90NameStream(sampleDataIn, Charset.forName(params.getEncoding()));

    Dictionary mDictionary;
    try {
      System.out.println("Creating Dictionary...");
      mDictionary = createDictionary(sampleStream);
    } catch (IOException e) {
      throw new TerminateToolException(
          -1, "IO error while reading training data or indexing data: " + e.getMessage(), e);
    } finally {
      try {
        sampleStream.close();
      } catch (IOException e) {
        // sorry this can fail..
      }
    }

    System.out.println("Saving Dictionary...");

    OutputStream out = null;

    try {
      out = new FileOutputStream(dictOutFile);
      mDictionary.serialize(out);
    } catch (IOException e) {
      throw new TerminateToolException(
          -1, "IO error while writing dictionary file: " + e.getMessage(), e);
    } finally {
      if (out != null)
        try {
          out.close();
        } catch (IOException e) {
          // file might be damaged
          throw new TerminateToolException(
              -1, "Attention: Failed to correctly write dictionary:" + e.getMessage(), e);
        }
    }
  }
 /**
  * Creates a dictionary with all names from the sample data.
  *
  * @return a dictionary
  * @throws IOException
  * @throws URISyntaxException
  */
 private static Dictionary createDictionary() throws IOException, URISyntaxException {
   ObjectStream<NameSample> sampleStream = createSample();
   NameSample sample = sampleStream.read();
   List<String[]> entries = new ArrayList<String[]>();
   while (sample != null) {
     Span[] names = sample.getNames();
     if (names != null && names.length > 0) {
       String[] toks = sample.getSentence();
       for (Span name : names) {
         String[] nameToks = new String[name.length()];
         System.arraycopy(toks, name.getStart(), nameToks, 0, name.length());
         entries.add(nameToks);
       }
     }
     sample = sampleStream.read();
   }
   sampleStream.close();
   Dictionary dictionary = new Dictionary(true);
   for (String[] entry : entries) {
     StringList dicEntry = new StringList(entry);
     dictionary.put(dicEntry);
   }
   return dictionary;
 }
예제 #4
0
 public void close() throws IOException {
   adSentenceStream.close();
 }