public List<CoreMap> getAPIElementSentences(boolean parse) {
    List<CoreMap> sentences = section.sentences;

    // getAllSentences(parse);
    List<CoreMap> apiSentences = new ArrayList<CoreMap>();

    String formattedAPI =
        apiElement
            .getAPIElementName()
            .replaceAll("\\(", "")
            .replaceAll("\\)", "")
            .replaceAll("\\.", "-")
            .toLowerCase();
    if (sentences == null) System.out.println("Warning");
    for (CoreMap sent : sentences) {
      if (sent.toString().toLowerCase().indexOf("clt_" + formattedAPI) != -1)
        apiSentences.add(sent);
    }

    if (apiSentences.isEmpty())
      System.out.println(
          "WARNGING: In getAPIElementSentences "
              + apiElement.getAPIElementName()
              + ","
              + section.getSubTitle());
    return apiSentences;
  }
Esempio n. 2
0
  private void parseThread(ArrayList<Thread> threads) {
    for (Thread t : threads) {
      ThreadVector tv = new ThreadVector(t);
      allThreads.add(tv);
      for (Email e : t.getEmails()) {
        StringBuffer sb = new StringBuffer();
        for (Sentence s : e.getSentences()) {
          // if it's the content of this email
          if (s.getQuotationTimes() == 0) {
            sb.append(s.getText() + " ");
          }
        }
        String content = sb.toString().toLowerCase();

        // create an empty Annotation just with the given text
        Annotation document = new Annotation(content);

        // run all Annotators on this text
        this.pipeline.annotate(document);

        // Iterate over all of the sentences found
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);
        for (CoreMap sentence : sentences) {
          List<String> lemmas = new LinkedList<String>();
          // Iterate over all tokens in a sentence
          for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // Retrieve and add the lemma for each word into the
            // list of lemmas
            lemmas.add(token.get(LemmaAnnotation.class));
          }

          HashMap<String, Integer> wordCount = countWordsInSentence(lemmas);
          // if it has valid words
          if (wordCount.size() > 0) {
            totalSentenceNumber++;
            for (String word : wordCount.keySet()) {
              if (!dictionaryIndex.containsKey(word)) {
                dictionaryIndex.put(word, dictionaryIndex.size());
                dictionaryDocumentCount.put(word, 1);
              } else {
                dictionaryDocumentCount.put(word, dictionaryDocumentCount.get(word) + 1);
              }
            }
            SentenceVector sv = new SentenceVector(sentence.toString(), wordCount);
            tv.addSentenceVectors(sv);
          }
        }
      }
    }
  }
Esempio n. 3
0
  private void evaluate(
      String normalizedSentence, String originalSentence, String entity1, String entity2)
      throws FileNotFoundException, UnsupportedEncodingException {
    List<RelationInstance> instances = null;

    try {
      instances = relationExtraction.extractRelations(normalizedSentence, true);
      if (instances.size() > 0) {
        CoreMap sentence = instances.get(0).getAnnotatedSentence();
        SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        System.out.println("========== Sentence =========");
        System.out.println(sentence.toString());
        if (dependencies != null) System.out.println(dependencies.toFormattedString());
        System.out.println("======= N-ary Instaces ======");
        for (RelationInstance instance : instances) {
          System.out.println(instance);
        }
      }

      instances = extractBinaryRelations(instances);

    } catch (Exception e) {
      e.printStackTrace();
      System.out.println("Resuming...");
    }

    boolean relationMatched = false;

    String concatenatedRelations = "";

    if (instances != null && instances.size() > 0) {

      System.out.println("======= Binary Instaces ======");

      for (RelationInstance instance : instances) {

        System.out.println("Instance: " + instance.getOriginalRelation());

        boolean containMention1 = false;
        boolean containMention2 = false;

        for (Argument arg : instance.getArguments()) {
          System.out.println("\tArg: [" + arg.getEntityId() + "] - Type: " + arg.getArgumentType());
          // .endsWith() (previously .contains()) is a hack for bad annotated entities in the ground
          // truth, such as Andre [[[Agassi]]].
          if (arg.getEntityName().endsWith(PLACEHOLDER_ENTITY1)) {
            containMention1 = true;
          }

          if (arg.getEntityName().endsWith(PLACEHOLDER_ENTITY2)) {
            containMention2 = true;
          }
        }

        if (containMention1 && containMention2) {
          if (concatenatedRelations.isEmpty()) {
            concatenatedRelations = instance.getOriginalRelation();
          } else {
            // concatenatedRelations += " ,, " + instance.getOriginalRelation();
            concatenatedRelations += " " + instance.getOriginalRelation();
          }

          relationMatched = true;
        }
      }
    }

    if (!relationMatched) {
      ps.println(entity1 + "\t---\t" + entity2 + "\t" + originalSentence);
    } else {
      ps.println(
          entity1 + "\t" + concatenatedRelations.trim() + "\t" + entity2 + "\t" + originalSentence);
    }
  }