public static double LexicalSimilarityScoreMin(
      ArrayList<TaggedWord> taggedWords1,
      ArrayList<TaggedWord> taggedWords2,
      DISCOSimilarity discoRAM,
      LexicalizedParser lp) {

    // System.out.println(taggedWords1.size() + "," + taggedWords2.size());

    // array of edge weights with default weight 0
    int length1 = taggedWords1.size();
    int length2 = taggedWords2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        String word1 = taggedWords1.get(i).word();
        String word2 = taggedWords2.get(j).word();
        double edgeWeight = 0;

        // LSA Similarity
        // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2);

        // DISCO Similarity
        // DISCOSimilarity discoObj = new DISCOSimilarity();
        try {
          if (word1.compareToIgnoreCase(word2) == 0) edgeWeight = 1;
          else {
            edgeWeight = discoRAM.similarity2(word1, word2);
            // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2);
          }
        } catch (Exception ex) {
          ex.printStackTrace();
        }

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    int minLength = Math.min(length1, length2);
    finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / minLength * 5;
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/arrSize * 5;

    return finalScore;
  }
Пример #2
0
  public Object formResult() {
    Set brs = new HashSet();
    Set urs = new HashSet();
    // scan each rule / history pair
    int ruleCount = 0;
    for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) {
      if (ruleCount % 100 == 0) {
        System.err.println("Rules multiplied: " + ruleCount);
      }
      ruleCount++;
      Pair rulePair = (Pair) pairI.next();
      Rule baseRule = (Rule) rulePair.first;
      String baseLabel = (String) ruleToLabel.get(baseRule);
      List history = (List) rulePair.second;
      double totalProb = 0;
      for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) {
        List subHistory = history.subList(0, depth);
        double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory));
        double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory));
        // System.out.println("Multiplying out "+baseRule+" with history "+subHistory);
        // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label);
        // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule );

        double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label);
        totalProb += prob;
        for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) {
          Rule rule = specifyRule(baseRule, subHistory, childDepth);
          rule.score = (float) Math.log(totalProb);
          // System.out.println("Created  "+rule+" with score "+rule.score);
          if (rule instanceof UnaryRule) {
            urs.add(rule);
          } else {
            brs.add(rule);
          }
        }
      }
    }
    System.out.println("Total states: " + stateNumberer.total());
    BinaryGrammar bg = new BinaryGrammar(stateNumberer.total());
    UnaryGrammar ug = new UnaryGrammar(stateNumberer.total());
    for (Iterator brI = brs.iterator(); brI.hasNext(); ) {
      BinaryRule br = (BinaryRule) brI.next();
      bg.addRule(br);
    }
    for (Iterator urI = urs.iterator(); urI.hasNext(); ) {
      UnaryRule ur = (UnaryRule) urI.next();
      ug.addRule(ur);
    }
    return new Pair(ug, bg);
  }
  public static double LexicalSimilarityScoreMax(
      ArrayList<TaggedWord> taggedWords1,
      ArrayList<TaggedWord> taggedWords2,
      DISCOSimilarity discoRAM,
      LexicalizedParser lp) {

    // System.out.println(taggedWords1.size() + "," + taggedWords2.size());

    // array of edge weights with default weight 0
    int length1 = taggedWords1.size();
    int length2 = taggedWords2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        String word1 = taggedWords1.get(i).word();
        String posTag1 = taggedWords1.get(i).tag();
        String word2 = taggedWords2.get(j).word();
        String posTag2 = taggedWords2.get(j).tag();

        ArrayList<TaggedWord> newList1 = new ArrayList<TaggedWord>();
        if (posTag1.length() >= 3 && posTag1.substring(0, 3).equals("NNP")) {
          newList1.add(taggedWords1.get(i));
        } else {
          String[] words = word1.split(" ");
          for (int k = 0; k < words.length; k++) newList1.add(new TaggedWord(words[k], posTag1));
        }

        ArrayList<TaggedWord> newList2 = new ArrayList<TaggedWord>();
        if (posTag2.length() >= 3 && posTag2.substring(0, 3).equals("NNP")) {
          newList2.add(taggedWords2.get(j));
        } else {
          String[] words = word2.split(" ");
          for (int k = 0; k < words.length; k++) newList2.add(new TaggedWord(words[k], posTag2));
        }

        double edgeWeight = LexicalSimilarityScoreMin(newList1, newList2, discoRAM, lp);

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    // int minLength = Math.min(length1, length2);
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5;
    finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5;

    return finalScore;
  }
Пример #4
0
 protected void tallyInternalNode(Tree lt, List parents) {
   // form base rule
   String label = lt.label().value();
   Rule baseR = ltToRule(lt);
   ruleToLabel.put(baseR, label);
   // act on each history depth
   for (int depth = 0, maxDepth = Math.min(HISTORY_DEPTH(), parents.size());
       depth <= maxDepth;
       depth++) {
     List history = new ArrayList(parents.subList(0, depth));
     // tally each history level / rewrite pair
     rulePairs.incrementCount(new Pair(baseR, history), 1);
     labelPairs.incrementCount(new Pair(label, history), 1);
   }
 }
  public static double LexicalSimilarity2Level(
      String sentence1, String sentence2, DISCOSimilarity discoRAM, LexicalizedParser lp) {
    Tree parse1 = lp.apply(sentence1);
    Tree parse2 = lp.apply(sentence2);

    int phraseSizeLimit = 2;

    ArrayList<ArrayList<TaggedWord>> phrasesList1 = getPhrases(parse1, phraseSizeLimit);
    ArrayList<ArrayList<TaggedWord>> phrasesList2 = getPhrases(parse2, phraseSizeLimit);

    int length1 = phrasesList1.size();
    int length2 = phrasesList2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        double edgeWeight = 0;
        ArrayList<TaggedWord> taggedWords1 = phrasesList1.get(i);
        ArrayList<TaggedWord> taggedWords2 = phrasesList2.get(j);
        // edgeWeight = LexicalSimilarityScore(taggedWords1, taggedWords2, discoRAM, lp)/5.0;
        edgeWeight = BestWordMatchEdgeWeight(taggedWords1, taggedWords2, discoRAM);

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    // int minLength = Math.min(length1, length2);
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5;
    if (arrSize == 0) finalScore = 0;
    else finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5;

    return finalScore;
  }
  public static double LexicalSimilarityScoreWordNet(
      String sentence1, String sentence2, LeskWSD tm, LexicalizedParser lp, WordNetSimilarity ws) {

    ArrayList<TaggedWord> taggedWordsPrev1 = Preprocess(StanfordParse(sentence1, lp));
    ArrayList<TaggedWord> taggedWordsPrev2 = Preprocess(StanfordParse(sentence2, lp));
    ArrayList<TaggedWord> taggedWords1 = new ArrayList<TaggedWord>();
    ArrayList<TaggedWord> taggedWords2 = new ArrayList<TaggedWord>();

    WordNetSense[] sensesPrev1 = tm.LeskJWI(sentence1);
    WordNetSense[] sensesPrev2 = tm.LeskJWI(sentence2);

    // System.out.println("Senses found!");

    ArrayList<WordNetSense> senses1 = new ArrayList<WordNetSense>();
    ArrayList<WordNetSense> senses2 = new ArrayList<WordNetSense>();

    for (int i = 0; i < taggedWordsPrev1.size(); i++) {
      String word = taggedWordsPrev1.get(i).word();
      String posTag = taggedWordsPrev1.get(i).tag();
      if (posTag.length() >= 2 && posTag.substring(0, 2).equals("NN")) {
        taggedWords1.add(new TaggedWord(word, "NN"));
        senses1.add(sensesPrev1[i]);
      } else if (posTag.length() >= 2 && posTag.substring(0, 2).equals("VB")) {
        taggedWords1.add(new TaggedWord(word, "VB"));
        senses1.add(sensesPrev1[i]);
      }
    }
    for (int i = 0; i < taggedWordsPrev2.size(); i++) {
      String word = taggedWordsPrev2.get(i).word();
      String posTag = taggedWordsPrev2.get(i).tag();
      if (posTag.length() >= 2 && posTag.substring(0, 2).equals("NN")) {
        taggedWords2.add(new TaggedWord(word, "NN"));
        senses2.add(sensesPrev2[i]);
      } else if (posTag.length() >= 2 && posTag.substring(0, 2).equals("VB")) {
        taggedWords2.add(new TaggedWord(word, "VB"));
        senses2.add(sensesPrev2[i]);
      }
    }

    // System.out.println(taggedWords1.size() + "," + taggedWords2.size());

    // array of edge weights with default weight 0
    int length1 = taggedWords1.size();
    int length2 = taggedWords2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        String word1 = taggedWords1.get(i).word();
        String posTag1 = taggedWords1.get(i).tag();
        String word2 = taggedWords2.get(j).word();
        String posTag2 = taggedWords2.get(j).tag();
        double edgeWeight = 0;

        // LSA Similarity
        // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2);

        // DISCO Similarity
        // DISCOSimilarity discoObj = new DISCOSimilarity();
        try {
          if (word1.compareToIgnoreCase(word2) == 0) edgeWeight = 1;
          else {
            // edgeWeight = ws.wuPalmerSimilarity(senses1.get(i), senses2.get(j));
            edgeWeight = ws.linSimilarity(senses1.get(i), senses2.get(j));
          }
        } catch (Exception ex) {
          ex.printStackTrace();
        }

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    int minLength = Math.min(length1, length2);
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5;
    if (arrSize == 0) finalScore = 0;
    else finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5;

    return finalScore;
  }