コード例 #1
0
  public static ArrayList<ArrayList<TaggedWord>> getPhrasesNaive(
      String sentence, LexicalizedParser lp, AbstractSequenceClassifier<CoreLabel> classifier) {
    ArrayList<ArrayList<TaggedWord>> newList = new ArrayList<ArrayList<TaggedWord>>();
    ArrayList<TaggedWord> taggedWords = StanfordNER.parse(sentence, lp, classifier);
    HashMap<String, String> phraseBoundaries = new HashMap<String, String>();
    phraseBoundaries.put(",", ",");
    phraseBoundaries.put("\"", "\"");
    phraseBoundaries.put("''", "''");
    phraseBoundaries.put("``", "``");
    phraseBoundaries.put("--", "--");
    // List<Tree> leaves = parse.getLeaves();
    ArrayList<TaggedWord> temp = new ArrayList<TaggedWord>();
    int index = 0;
    while (index < taggedWords.size()) {
      if ((phraseBoundaries.containsKey(taggedWords.get(index).word()))) {
        if (temp.size() > 0) {
          // System.out.println(temp);
          ArrayList<TaggedWord> tempCopy = new ArrayList<TaggedWord>(temp);
          newList.add(Preprocess(tempCopy));
        }
        temp.clear();
      } else {
        // System.out.println(taggedWords.get(index).toString());
        temp.add(taggedWords.get(index));
      }
      index += 1;
    }
    if (temp.size() > 0) {
      ArrayList<TaggedWord> tempCopy = new ArrayList<TaggedWord>(temp);
      newList.add(Preprocess(tempCopy));
    }

    // System.out.println(newList);
    return newList;
  }
コード例 #2
0
  // mode - 2level
  public static double LexicalSimilarityScore(
      String sentence1,
      String sentence2,
      DISCOSimilarity discoRAM,
      LexicalizedParser lp,
      AbstractSequenceClassifier<CoreLabel> classifier) {
    //        ArrayList<TaggedWord> taggedWords1 = Preprocess(StanfordNER.parse(sentence1, lp,
    // classifier));
    //        ArrayList<TaggedWord> taggedWords2 = Preprocess(StanfordNER.parse(sentence2, lp,
    // classifier));

    ArrayList<TaggedWord> taggedWords1 =
        Preprocess(StanfordNER.parseNERMWE(sentence1, lp, classifier));
    ArrayList<TaggedWord> taggedWords2 =
        Preprocess(StanfordNER.parseNERMWE(sentence2, lp, classifier));

    // return LexicalSimilarityScore(taggedWords1, taggedWords2, discoRAM, lp);
    return LexicalSimilarityScoreMax(taggedWords1, taggedWords2, discoRAM, lp);
  }