コード例 #1
0
  public static double LexicalSimilarityScoreMax(
      ArrayList<TaggedWord> taggedWords1,
      ArrayList<TaggedWord> taggedWords2,
      DISCOSimilarity discoRAM,
      LexicalizedParser lp) {

    // System.out.println(taggedWords1.size() + "," + taggedWords2.size());

    // array of edge weights with default weight 0
    int length1 = taggedWords1.size();
    int length2 = taggedWords2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        String word1 = taggedWords1.get(i).word();
        String posTag1 = taggedWords1.get(i).tag();
        String word2 = taggedWords2.get(j).word();
        String posTag2 = taggedWords2.get(j).tag();

        ArrayList<TaggedWord> newList1 = new ArrayList<TaggedWord>();
        if (posTag1.length() >= 3 && posTag1.substring(0, 3).equals("NNP")) {
          newList1.add(taggedWords1.get(i));
        } else {
          String[] words = word1.split(" ");
          for (int k = 0; k < words.length; k++) newList1.add(new TaggedWord(words[k], posTag1));
        }

        ArrayList<TaggedWord> newList2 = new ArrayList<TaggedWord>();
        if (posTag2.length() >= 3 && posTag2.substring(0, 3).equals("NNP")) {
          newList2.add(taggedWords2.get(j));
        } else {
          String[] words = word2.split(" ");
          for (int k = 0; k < words.length; k++) newList2.add(new TaggedWord(words[k], posTag2));
        }

        double edgeWeight = LexicalSimilarityScoreMin(newList1, newList2, discoRAM, lp);

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    // int minLength = Math.min(length1, length2);
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5;
    finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5;

    return finalScore;
  }
コード例 #2
0
  public static double LexicalSimilarityScoreMin(
      ArrayList<TaggedWord> taggedWords1,
      ArrayList<TaggedWord> taggedWords2,
      DISCOSimilarity discoRAM,
      LexicalizedParser lp) {

    // System.out.println(taggedWords1.size() + "," + taggedWords2.size());

    // array of edge weights with default weight 0
    int length1 = taggedWords1.size();
    int length2 = taggedWords2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        String word1 = taggedWords1.get(i).word();
        String word2 = taggedWords2.get(j).word();
        double edgeWeight = 0;

        // LSA Similarity
        // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2);

        // DISCO Similarity
        // DISCOSimilarity discoObj = new DISCOSimilarity();
        try {
          if (word1.compareToIgnoreCase(word2) == 0) edgeWeight = 1;
          else {
            edgeWeight = discoRAM.similarity2(word1, word2);
            // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2);
          }
        } catch (Exception ex) {
          ex.printStackTrace();
        }

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    int minLength = Math.min(length1, length2);
    finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / minLength * 5;
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/arrSize * 5;

    return finalScore;
  }
コード例 #3
0
  public static double LexicalSimilarity2Level(
      String sentence1, String sentence2, DISCOSimilarity discoRAM, LexicalizedParser lp) {
    Tree parse1 = lp.apply(sentence1);
    Tree parse2 = lp.apply(sentence2);

    int phraseSizeLimit = 2;

    ArrayList<ArrayList<TaggedWord>> phrasesList1 = getPhrases(parse1, phraseSizeLimit);
    ArrayList<ArrayList<TaggedWord>> phrasesList2 = getPhrases(parse2, phraseSizeLimit);

    int length1 = phrasesList1.size();
    int length2 = phrasesList2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        double edgeWeight = 0;
        ArrayList<TaggedWord> taggedWords1 = phrasesList1.get(i);
        ArrayList<TaggedWord> taggedWords2 = phrasesList2.get(j);
        // edgeWeight = LexicalSimilarityScore(taggedWords1, taggedWords2, discoRAM, lp)/5.0;
        edgeWeight = BestWordMatchEdgeWeight(taggedWords1, taggedWords2, discoRAM);

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    // int minLength = Math.min(length1, length2);
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5;
    if (arrSize == 0) finalScore = 0;
    else finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5;

    return finalScore;
  }
コード例 #4
0
  static ArrayList<StringPair> getMappingsLabels(
      ArrayList<String> g1Labels, ArrayList<String> g2Labels, double threshold) {

    SnowballStemmer englishStemmer = Settings.getStemmer("english");
    int dimFunc = g1Labels.size() > g2Labels.size() ? g1Labels.size() : g2Labels.size();
    double costs[][] = new double[dimFunc][dimFunc];
    double costsCopy[][] = new double[dimFunc][dimFunc];
    ArrayList<StringPair> solutionMappings = new ArrayList<StringPair>();

    if (g1Labels.size() == 0 || g2Labels.size() == 0) {
      return solutionMappings;
    }

    // function mapping score
    for (int i = 0; i < g1Labels.size(); i++) {
      for (int j = 0; j < g2Labels.size(); j++) {
        double edScore;

        edScore =
            LabelEditDistance.edTokensWithStemming(
                g1Labels.get(i), g2Labels.get(j), Settings.STRING_DELIMETER, englishStemmer, true);

        if (edScore < threshold) edScore = 1;

        costs[i][j] = edScore;
      }
    }

    for (int i = 0; i < costs.length; i++) {
      for (int j = 0; j < costs[0].length; j++) {
        costsCopy[i][j] = costs[i][j];
      }
    }

    int[][] result = HungarianAlgorithm.computeAssignments(costsCopy);

    for (int i = 0; i < result.length; i++) {
      solutionMappings.add(new StringPair(g1Labels.get(result[i][0]), g2Labels.get(result[i][1])));
    }

    return solutionMappings;
  }
コード例 #5
0
  public static double LexicalSimilarityScoreWordNet(
      String sentence1, String sentence2, LeskWSD tm, LexicalizedParser lp, WordNetSimilarity ws) {

    ArrayList<TaggedWord> taggedWordsPrev1 = Preprocess(StanfordParse(sentence1, lp));
    ArrayList<TaggedWord> taggedWordsPrev2 = Preprocess(StanfordParse(sentence2, lp));
    ArrayList<TaggedWord> taggedWords1 = new ArrayList<TaggedWord>();
    ArrayList<TaggedWord> taggedWords2 = new ArrayList<TaggedWord>();

    WordNetSense[] sensesPrev1 = tm.LeskJWI(sentence1);
    WordNetSense[] sensesPrev2 = tm.LeskJWI(sentence2);

    // System.out.println("Senses found!");

    ArrayList<WordNetSense> senses1 = new ArrayList<WordNetSense>();
    ArrayList<WordNetSense> senses2 = new ArrayList<WordNetSense>();

    for (int i = 0; i < taggedWordsPrev1.size(); i++) {
      String word = taggedWordsPrev1.get(i).word();
      String posTag = taggedWordsPrev1.get(i).tag();
      if (posTag.length() >= 2 && posTag.substring(0, 2).equals("NN")) {
        taggedWords1.add(new TaggedWord(word, "NN"));
        senses1.add(sensesPrev1[i]);
      } else if (posTag.length() >= 2 && posTag.substring(0, 2).equals("VB")) {
        taggedWords1.add(new TaggedWord(word, "VB"));
        senses1.add(sensesPrev1[i]);
      }
    }
    for (int i = 0; i < taggedWordsPrev2.size(); i++) {
      String word = taggedWordsPrev2.get(i).word();
      String posTag = taggedWordsPrev2.get(i).tag();
      if (posTag.length() >= 2 && posTag.substring(0, 2).equals("NN")) {
        taggedWords2.add(new TaggedWord(word, "NN"));
        senses2.add(sensesPrev2[i]);
      } else if (posTag.length() >= 2 && posTag.substring(0, 2).equals("VB")) {
        taggedWords2.add(new TaggedWord(word, "VB"));
        senses2.add(sensesPrev2[i]);
      }
    }

    // System.out.println(taggedWords1.size() + "," + taggedWords2.size());

    // array of edge weights with default weight 0
    int length1 = taggedWords1.size();
    int length2 = taggedWords2.size();
    int arrSize = Math.max(length1, length2);
    double[][] array = new double[arrSize][arrSize];
    for (int i = 0; i < arrSize; i++) {
      for (int j = 0; j < arrSize; j++) {
        array[i][j] = 0;
      }
    }
    for (int i = 0; i < length1; i++) {
      for (int j = 0; j < length2; j++) {
        String word1 = taggedWords1.get(i).word();
        String posTag1 = taggedWords1.get(i).tag();
        String word2 = taggedWords2.get(j).word();
        String posTag2 = taggedWords2.get(j).tag();
        double edgeWeight = 0;

        // LSA Similarity
        // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2);

        // DISCO Similarity
        // DISCOSimilarity discoObj = new DISCOSimilarity();
        try {
          if (word1.compareToIgnoreCase(word2) == 0) edgeWeight = 1;
          else {
            // edgeWeight = ws.wuPalmerSimilarity(senses1.get(i), senses2.get(j));
            edgeWeight = ws.linSimilarity(senses1.get(i), senses2.get(j));
          }
        } catch (Exception ex) {
          ex.printStackTrace();
        }

        array[i][j] = edgeWeight;
      }
    }

    // System.out.println("Hungarian starts " + arrSize);

    double finalScore;
    String sumType = "max";
    int minLength = Math.min(length1, length2);
    // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5;
    if (arrSize == 0) finalScore = 0;
    else finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5;

    return finalScore;
  }
コード例 #6
0
  /**
   * Finds the vertex mapping
   *
   * @param g1Vertices - graph g1 vertices that need to be matched with graph g1 vertices
   * @param g2Vertices - graph g2 vertices
   * @param threshold - if node similarity is >= than threshold then these nodes are considered to
   *     be matched.
   * @param stemmer - stemmer for wrord stemming, if == null, then english stemmer is used
   * @param gateways - if == 0, then gateways are not matched, if == 1, then only parent are looked,
   *     if == 2, then only children are looked
   * @return matching vertex pairs
   */
  public static ArrayList<VertexPair> getMappingsVetrex(
      ArrayList<Vertex> g1Vertices,
      ArrayList<Vertex> g2Vertices,
      double threshold,
      SnowballStemmer stemmer,
      int gateways) {

    ArrayList<VertexPair> solutionMappings = new ArrayList<VertexPair>();

    if (g1Vertices.size() == 0 || g2Vertices.size() == 0) {
      return solutionMappings;
    }

    if (stemmer == null) {
      stemmer = Settings.getEnglishStemmer();
    }

    ArrayList<Vertex> g1Vertices_fe = new ArrayList<Vertex>();
    ArrayList<Vertex> g2Vertices_fe = new ArrayList<Vertex>();

    for (Vertex v : g1Vertices) {
      if (!v.getType().equals(Vertex.Type.gateway)) {
        g1Vertices_fe.add(v);
      }
    }

    for (Vertex v : g2Vertices) {
      if (!v.getType().equals(Vertex.Type.gateway)) {
        g2Vertices_fe.add(v);
      }
    }

    if (g1Vertices_fe.size() > 0 && g2Vertices_fe.size() > 0) {
      int dimFunc =
          g1Vertices_fe.size() > g2Vertices_fe.size() ? g1Vertices_fe.size() : g2Vertices_fe.size();
      double costs[][] = new double[dimFunc][dimFunc];
      double costsCopy[][] = new double[dimFunc][dimFunc];
      int nrZeros = 0;

      // function mapping score
      for (int i = 0; i < g1Vertices_fe.size(); i++) {
        for (int j = 0; j < g2Vertices_fe.size(); j++) {
          double edScore = 0;
          if (g1Vertices_fe.get(i).getType().equals(g2Vertices_fe.get(j).getType())
              && g1Vertices_fe.get(i).getLabel() != null
              && g2Vertices_fe.get(j).getLabel() != null) {
            edScore =
                LabelEditDistance.edTokensWithStemming(
                    g1Vertices_fe.get(i).getLabel(),
                    g2Vertices_fe.get(j).getLabel(),
                    Settings.STRING_DELIMETER,
                    stemmer,
                    true);
          }

          if (edScore < threshold) edScore = 0;

          if (edScore == 0) {
            nrZeros++;
          }

          costs[i][j] = (-1) * edScore;
        }
      }

      if (nrZeros != g1Vertices_fe.size() * g2Vertices_fe.size()) {
        for (int i = 0; i < costs.length; i++) {
          for (int j = 0; j < costs[0].length; j++) {
            costsCopy[i][j] = costs[i][j];
          }
        }

        int[][] result = HungarianAlgorithm.computeAssignments(costsCopy);

        for (int i = 0; i < result.length; i++) {
          double pairCost = (-1) * costs[result[i][0]][result[i][1]];
          if (result[i][0] < g1Vertices_fe.size()
              && result[i][1] < g2Vertices_fe.size()
              && pairCost >= threshold
              && AssingmentProblem.canMap(
                  g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]))) {
            solutionMappings.add(
                new VertexPair(
                    g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]), pairCost));
          }
        }
      }
    }
    if (gateways > 0) {
      solutionMappings.addAll(
          getMappingsGateways(g1Vertices, g2Vertices, threshold, stemmer, gateways));
    }
    return solutionMappings;
  }
コード例 #7
0
  /**
   * Finds the match between gateways, the decision is made based on the match of gateway
   * parents/children match, if the parent/child is also a gateway, then the decision is done
   * recursively
   *
   * @param g1Vertices - graph g1 vertices that need to be matched with graph g1 vertices
   * @param g2Vertices - graph g2 vertices
   * @param threshold - if node similarity is >= than threshold then these nodes are considered to
   *     be matched.
   * @param stemmer - stemmer for wrord stemming, if == null, then english stemmer is used
   * @param lookParents - if == 0, then gateways are not matched, if == 1, then only parent are
   *     looked, if == 2, then only children are looked
   * @return
   */
  public static ArrayList<VertexPair> getMappingsGateways(
      ArrayList<Vertex> g1Vertices,
      ArrayList<Vertex> g2Vertices,
      double threshold,
      SnowballStemmer stemmer,
      int lookParents) {

    ArrayList<Vertex> g1Gateways = new ArrayList<Vertex>();
    ArrayList<Vertex> g2Gateways = new ArrayList<Vertex>();

    ArrayList<VertexPair> possibleMatches = new ArrayList<VertexPair>();

    for (Vertex v : g1Vertices) {
      if (v.getType().equals(Vertex.Type.gateway)) {
        g1Gateways.add(v);
      }
    }

    for (Vertex v : g2Vertices) {
      if (v.getType().equals(Vertex.Type.gateway)) {
        g2Gateways.add(v);
      }
    }

    if (g1Gateways.size() == 0 || g2Gateways.size() == 0) {
      return possibleMatches;
    }

    int dimFunc = g1Gateways.size() > g2Gateways.size() ? g1Gateways.size() : g2Gateways.size();

    double costs[][] = new double[dimFunc][dimFunc];
    double costsCopy[][] = new double[dimFunc][dimFunc];

    for (int i = 0; i < g1Gateways.size(); i++) {
      for (int j = 0; j < g2Gateways.size(); j++) {
        double edScore = 0;
        ArrayList<VertexPair> map;
        if (lookParents == 2) {
          map =
              getMappingsVetrex(
                  g1Gateways.get(i).getChildren(),
                  g2Gateways.get(j).getChildren(),
                  threshold,
                  stemmer,
                  lookParents);
          for (VertexPair vp : map) {
            edScore += vp.getWeight();
          }

          edScore = map.size() == 0 ? 0 : edScore / map.size();
        } else if (lookParents == 1) {
          map =
              getMappingsVetrex(
                  g1Gateways.get(i).getParents(),
                  g2Gateways.get(j).getParents(),
                  threshold,
                  stemmer,
                  lookParents);
          for (VertexPair vp : map) {
            edScore += vp.getWeight();
          }

          edScore = map.size() == 0 ? 0 : edScore / map.size();
        }

        if (edScore < threshold) edScore = 0;

        costs[i][j] = (-1) * edScore;
      }
    }

    for (int i = 0; i < costs.length; i++) {
      for (int j = 0; j < costs[0].length; j++) {
        costsCopy[i][j] = costs[i][j];
      }
    }

    int[][] result = HungarianAlgorithm.computeAssignments(costsCopy);

    for (int i = 0; i < result.length; i++) {
      double pairCost = (-1) * costs[result[i][0]][result[i][1]];
      if (result[i][0] < g1Gateways.size() && result[i][1] < g2Gateways.size() && pairCost > 0) {
        possibleMatches.add(
            new VertexPair(g1Gateways.get(result[i][0]), g2Gateways.get(result[i][1]), pairCost));
      }
    }
    return possibleMatches;
  }
コード例 #8
0
  public static ArrayList<VertexPair> getMappingsVetrexUsingNodeMapping(
      Graph g1, Graph g2, double threshold, double semanticThreshold) {

    ArrayList<Vertex> g1Vertices = (ArrayList<Vertex>) g1.getVertices();
    ArrayList<Vertex> g2Vertices = (ArrayList<Vertex>) g2.getVertices();

    ArrayList<VertexPair> solutionMappings = new ArrayList<VertexPair>();

    if (g1Vertices.size() == 0 || g2Vertices.size() == 0) {
      return solutionMappings;
    }

    if (g1Vertices.size() > 0 && g2Vertices.size() > 0) {
      int dimFunc = g1Vertices.size() > g2Vertices.size() ? g1Vertices.size() : g2Vertices.size();
      double costs[][] = new double[dimFunc][dimFunc];
      double costsCopy[][] = new double[dimFunc][dimFunc];
      int nrZeros = 0;

      // function mapping score
      for (int i = 0; i < g1Vertices.size(); i++) {
        for (int j = 0; j < g2Vertices.size(); j++) {
          double edScore =
              NodeSimilarity.findNodeSimilarity(g1Vertices.get(i), g2Vertices.get(j), threshold);
          if (g1Vertices.get(i).getType().equals(Type.gateway)
              && g2Vertices.get(j).getType().equals(Type.gateway)
              && edScore < semanticThreshold) {
            edScore = 0;
          } else if (!(g1Vertices.get(i).getType().equals(Type.gateway)
                  && g2Vertices.get(j).getType().equals(Type.gateway))
              && edScore < threshold) edScore = 0;

          if (edScore == 0) {
            nrZeros++;
          }
          costs[i][j] = (-1) * edScore;
        }
      }

      if (nrZeros != g1Vertices.size() * g2Vertices.size()) {
        for (int i = 0; i < costs.length; i++) {
          for (int j = 0; j < costs[0].length; j++) {
            costsCopy[i][j] = costs[i][j];
          }
        }

        int[][] result = HungarianAlgorithm.computeAssignments(costsCopy);

        for (int i = 0; i < result.length; i++) {
          double pairCost = (-1) * costs[result[i][0]][result[i][1]];
          if (result[i][0] < g1Vertices.size()
              && result[i][1] < g2Vertices.size()
              && pairCost > 0
              && AssingmentProblem.canMap(
                  g1Vertices.get(result[i][0]), g2Vertices.get(result[i][1]))) {
            solutionMappings.add(
                new VertexPair(
                    g1Vertices.get(result[i][0]), g2Vertices.get(result[i][1]), pairCost));
          }
        }
      }
    }
    return solutionMappings;
  }