예제 #1
0
  /**
   * Loads the model from disk.
   *
   * @param path The location of model that was saved to disk
   * @throws ClassCastException if model is the wrong format
   * @throws IOException if the model file doesn't exist or is otherwise unavailable/incomplete
   * @throws ClassNotFoundException this would probably indicate a serious classpath problem
   */
  public static BasicEntityExtractor load(
      String path,
      Class<? extends BasicEntityExtractor> entityClassifier,
      boolean preferDefaultGazetteer)
      throws ClassCastException, IOException, ClassNotFoundException {

    // load the additional arguments
    // try to load the extra file from the CLASSPATH first
    InputStream is =
        BasicEntityExtractor.class.getClassLoader().getResourceAsStream(path + ".extra");
    // if not found in the CLASSPATH, load from the file system
    if (is == null) is = new FileInputStream(path + ".extra");
    ObjectInputStream in = new ObjectInputStream(is);
    String gazetteerLocation = ErasureUtils.<String>uncheckedCast(in.readObject());
    if (preferDefaultGazetteer) gazetteerLocation = DefaultPaths.DEFAULT_NFL_GAZETTEER;
    Set<String> annotationsToSkip = ErasureUtils.<Set<String>>uncheckedCast(in.readObject());
    Boolean useSubTypes = ErasureUtils.<Boolean>uncheckedCast(in.readObject());
    Boolean useBIO = ErasureUtils.<Boolean>uncheckedCast(in.readObject());
    in.close();
    is.close();

    BasicEntityExtractor extractor =
        (BasicEntityExtractor)
            MachineReading.makeEntityExtractor(entityClassifier, gazetteerLocation);

    // load the CRF classifier (this works from any resource, e.g., classpath or file system)
    extractor.classifier = CRFClassifier.getClassifier(path);

    // copy the extra arguments
    extractor.annotationsToSkip = annotationsToSkip;
    extractor.useSubTypes = useSubTypes;
    extractor.useBIO = useBIO;

    return extractor;
  }
예제 #2
0
  /**
   * Annotate an ExtractionDataSet with entities. This will modify the ExtractionDataSet in place.
   *
   * @param doc The dataset to label
   */
  @Override
  public void annotate(Annotation doc) {
    if (SAVE_CONLL_2003) {
      // dump a file in CoNLL-2003 format
      try {
        PrintStream os = new PrintStream(new FileOutputStream("test.conll"));
        List<List<CoreLabel>> labels =
            AnnotationUtils.entityMentionsToCoreLabels(doc, annotationsToSkip, useSubTypes, useBIO);
        BasicEntityExtractor.saveCoNLL(os, labels, true);
        // saveCoNLLFiles("/tmp/ace/test", doc, useSubTypes, useBIO);
        os.close();
      } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
      }
    }

    List<CoreMap> sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
    int sentCount = 1;
    for (CoreMap sentence : sents) {
      if (useNERTags) {
        this.makeAnnotationFromAllNERTags(sentence);
      } else extractEntities(sentence, sentCount);
      sentCount++;
    }

    /*
    if(SAVE_CONLL_2003){
      try {
        saveCoNLLFiles("test_output/", doc, useSubTypes, useBIO);
        System.err.println("useBIO = " + useBIO);
      } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
      }
    }
    */
  }