Java Corpus 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: edu.washington.multir.corpus

클래스/타입: Corpus

hotexamples.com에서의 예제들: 2

Java Corpus - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 edu.washington.multir.corpus.Corpus에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

getDocumentIterator(1)

setCorpusToTest(1)

setCorpusToTrain(1)

예제 #1

파일 보기

파일: ManualEvaluation.java 프로젝트: knowitall/MultirExtractor

 private static List<Extraction> getExtractions(
     Corpus c, ArgumentIdentification ai, SententialInstanceGeneration sig, DocumentExtractor de)
     throws SQLException, IOException {
   List<Extraction> extrs = new ArrayList<Extraction>();
   Iterator<Annotation> docs = c.getDocumentIterator();
   Map<Integer, String> ftID2ftMap = ModelUtils.getFeatureIDToFeatureMap(de.getMapping());
   while (docs.hasNext()) {
     Annotation doc = docs.next();
     List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
     int sentenceCount = 1;
     for (CoreMap sentence : sentences) {
       // argument identification
       List<Argument> arguments = ai.identifyArguments(doc, sentence);
       // sentential instance generation
       List<Pair<Argument, Argument>> sententialInstances =
           sig.generateSententialInstances(arguments, sentence);
       for (Pair<Argument, Argument> p : sententialInstances) {
         Pair<Triple<String, Double, Double>, Map<Integer, Double>> extrResult =
             de.extractFromSententialInstanceWithFeatureScores(p.first, p.second, sentence, doc);
         if (extrResult != null) {
           Triple<String, Double, Double> extrScoreTripe = extrResult.first;
           Map<Integer, Double> featureScores = extrResult.second;
           String rel = extrScoreTripe.first;
           if (targetRelations.contains(rel)) {
             String docName = sentence.get(SentDocName.class);
             String senText = sentence.get(CoreAnnotations.TextAnnotation.class);
             Integer sentNum = sentence.get(SentGlobalID.class);
             Extraction e =
                 new Extraction(
                     p.first, p.second, docName, rel, sentNum, extrScoreTripe.third, senText);
             e.setFeatureScoreList(EvaluationUtils.getFeatureScoreList(featureScores, ftID2ftMap));
             extrs.add(e);
           }
         }
       }
       sentenceCount++;
     }
   }
   return EvaluationUtils.getUniqueList(extrs);
 }

예제 #2

파일 보기

파일: ManualEvaluation.java 프로젝트: knowitall/MultirExtractor

  public static void main(String[] args)
      throws ParseException, ClassNotFoundException, InstantiationException, IllegalAccessException,
          NoSuchMethodException, SecurityException, IllegalArgumentException,
          InvocationTargetException, SQLException, IOException {

    List<String> arguments = new ArrayList<String>();
    for (String arg : args) {
      arguments.add(arg);
    }

    CorpusInformationSpecification cis = CLIUtils.loadCorpusInformationSpecification(arguments);
    FeatureGenerator fg = CLIUtils.loadFeatureGenerator(arguments);
    ArgumentIdentification ai = CLIUtils.loadArgumentIdentification(arguments);
    SententialInstanceGeneration sig = CLIUtils.loadSententialInformationGeneration(arguments);

    String multirModelPath = arguments.get(1);
    String annotationsInputFilePath = arguments.get(2);
    String evaluationRelationsFilePath = arguments.get(3);

    targetRelations = EvaluationUtils.loadTargetRelations(evaluationRelationsFilePath);

    // load test corpus
    Corpus c = new Corpus(arguments.get(0), cis, true);
    DocumentExtractor de = new DocumentExtractor(multirModelPath, fg, ai, sig);

    // if corpus object is full corpus, we may specify to look at train or test
    // partition of it based on a input file representing the names of the test documents
    if (arguments.size() == 6) {
      String corpusSetting = arguments.get(4);
      String pathToTestDocumentFile = arguments.get(5);

      if (!corpusSetting.equals("train") && !corpusSetting.equals("test")) {
        throw new IllegalArgumentException("This argument must be train or test");
      }
      File f = new File(pathToTestDocumentFile);
      if (!f.exists() || !f.isFile()) {
        throw new IllegalArgumentException(
            "File at " + pathToTestDocumentFile + " does not exist or is not a file");
      }

      if (corpusSetting.equals("train")) {
        c.setCorpusToTrain(pathToTestDocumentFile);
      } else {
        c.setCorpusToTest(pathToTestDocumentFile);
      }
    }

    if (fg instanceof DefaultFeatureGeneratorWithFIGER
        | fg instanceof DefaultFeatureGeneratorConcatFIGER
        | fg instanceof DefaultFeatureGeneratorIndepFIGER) {
      FigerTypeUtils.init();
    }

    long start = System.currentTimeMillis();
    List<Extraction> extractions = getExtractions(c, ai, sig, de);
    long end = System.currentTimeMillis();
    System.out.println("Got Extractions in " + (end - start));

    start = end;
    List<ExtractionAnnotation> annotations =
        EvaluationUtils.loadAnnotations(annotationsInputFilePath);
    end = System.currentTimeMillis();
    System.out.println("Got Annotations in " + (end - start));

    start = end;

    List<Extraction> diffExtractions = EvaluationUtils.getDiff(extractions, annotations);
    end = System.currentTimeMillis();
    System.out.println("Got diff in " + (end - start));

    boolean useFixedSet = false;
    if (useFixedSet) {
      for (int i = extractions.size() - 1; i > -1; i--) {
        if (diffExtractions.contains(extractions.get(i))) {
          System.out.println("removing");
          extractions.remove(i);
        }
      }
      diffExtractions.clear();
    }

    // if there is a diff then don't evaluate algorithm yet
    if (diffExtractions.size() > 0) {
      // output diff
      String diffOutputName = annotationsInputFilePath + ".diff";
      EvaluationUtils.writeExtractions(diffExtractions, diffOutputName);
      throw new IllegalStateException(
          "inputAnnotations do not include all of the extractions, tag the diff at "
              + diffOutputName
              + " and merge with annotations");
    } else {
      EvaluationUtils.eval(extractions, annotations, targetRelations);
      EvaluationUtils.relByRelEvaluation(extractions, annotations, targetRelations);
    }

    if (fg instanceof DefaultFeatureGeneratorWithFIGER
        | fg instanceof DefaultFeatureGeneratorConcatFIGER
        | fg instanceof DefaultFeatureGeneratorIndepFIGER) {
      FigerTypeUtils.close();
    }
  }