Ejemplo n.º 1
0
  static ArrayList<StringPair> getMappingsLabels(
      ArrayList<String> g1Labels, ArrayList<String> g2Labels, double threshold) {

    SnowballStemmer englishStemmer = Settings.getStemmer("english");
    int dimFunc = g1Labels.size() > g2Labels.size() ? g1Labels.size() : g2Labels.size();
    double costs[][] = new double[dimFunc][dimFunc];
    double costsCopy[][] = new double[dimFunc][dimFunc];
    ArrayList<StringPair> solutionMappings = new ArrayList<StringPair>();

    if (g1Labels.size() == 0 || g2Labels.size() == 0) {
      return solutionMappings;
    }

    // function mapping score
    for (int i = 0; i < g1Labels.size(); i++) {
      for (int j = 0; j < g2Labels.size(); j++) {
        double edScore;

        edScore =
            LabelEditDistance.edTokensWithStemming(
                g1Labels.get(i), g2Labels.get(j), Settings.STRING_DELIMETER, englishStemmer, true);

        if (edScore < threshold) edScore = 1;

        costs[i][j] = edScore;
      }
    }

    for (int i = 0; i < costs.length; i++) {
      for (int j = 0; j < costs[0].length; j++) {
        costsCopy[i][j] = costs[i][j];
      }
    }

    int[][] result = HungarianAlgorithm.computeAssignments(costsCopy);

    for (int i = 0; i < result.length; i++) {
      solutionMappings.add(new StringPair(g1Labels.get(result[i][0]), g2Labels.get(result[i][1])));
    }

    return solutionMappings;
  }
Ejemplo n.º 2
0
  /**
   * Finds the vertex mapping
   *
   * @param g1Vertices - graph g1 vertices that need to be matched with graph g1 vertices
   * @param g2Vertices - graph g2 vertices
   * @param threshold - if node similarity is >= than threshold then these nodes are considered to
   *     be matched.
   * @param stemmer - stemmer for wrord stemming, if == null, then english stemmer is used
   * @param gateways - if == 0, then gateways are not matched, if == 1, then only parent are looked,
   *     if == 2, then only children are looked
   * @return matching vertex pairs
   */
  public static ArrayList<VertexPair> getMappingsVetrex(
      ArrayList<Vertex> g1Vertices,
      ArrayList<Vertex> g2Vertices,
      double threshold,
      SnowballStemmer stemmer,
      int gateways) {

    ArrayList<VertexPair> solutionMappings = new ArrayList<VertexPair>();

    if (g1Vertices.size() == 0 || g2Vertices.size() == 0) {
      return solutionMappings;
    }

    if (stemmer == null) {
      stemmer = Settings.getEnglishStemmer();
    }

    ArrayList<Vertex> g1Vertices_fe = new ArrayList<Vertex>();
    ArrayList<Vertex> g2Vertices_fe = new ArrayList<Vertex>();

    for (Vertex v : g1Vertices) {
      if (!v.getType().equals(Vertex.Type.gateway)) {
        g1Vertices_fe.add(v);
      }
    }

    for (Vertex v : g2Vertices) {
      if (!v.getType().equals(Vertex.Type.gateway)) {
        g2Vertices_fe.add(v);
      }
    }

    if (g1Vertices_fe.size() > 0 && g2Vertices_fe.size() > 0) {
      int dimFunc =
          g1Vertices_fe.size() > g2Vertices_fe.size() ? g1Vertices_fe.size() : g2Vertices_fe.size();
      double costs[][] = new double[dimFunc][dimFunc];
      double costsCopy[][] = new double[dimFunc][dimFunc];
      int nrZeros = 0;

      // function mapping score
      for (int i = 0; i < g1Vertices_fe.size(); i++) {
        for (int j = 0; j < g2Vertices_fe.size(); j++) {
          double edScore = 0;
          if (g1Vertices_fe.get(i).getType().equals(g2Vertices_fe.get(j).getType())
              && g1Vertices_fe.get(i).getLabel() != null
              && g2Vertices_fe.get(j).getLabel() != null) {
            edScore =
                LabelEditDistance.edTokensWithStemming(
                    g1Vertices_fe.get(i).getLabel(),
                    g2Vertices_fe.get(j).getLabel(),
                    Settings.STRING_DELIMETER,
                    stemmer,
                    true);
          }

          if (edScore < threshold) edScore = 0;

          if (edScore == 0) {
            nrZeros++;
          }

          costs[i][j] = (-1) * edScore;
        }
      }

      if (nrZeros != g1Vertices_fe.size() * g2Vertices_fe.size()) {
        for (int i = 0; i < costs.length; i++) {
          for (int j = 0; j < costs[0].length; j++) {
            costsCopy[i][j] = costs[i][j];
          }
        }

        int[][] result = HungarianAlgorithm.computeAssignments(costsCopy);

        for (int i = 0; i < result.length; i++) {
          double pairCost = (-1) * costs[result[i][0]][result[i][1]];
          if (result[i][0] < g1Vertices_fe.size()
              && result[i][1] < g2Vertices_fe.size()
              && pairCost >= threshold
              && AssingmentProblem.canMap(
                  g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]))) {
            solutionMappings.add(
                new VertexPair(
                    g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]), pairCost));
          }
        }
      }
    }
    if (gateways > 0) {
      solutionMappings.addAll(
          getMappingsGateways(g1Vertices, g2Vertices, threshold, stemmer, gateways));
    }
    return solutionMappings;
  }