static ArrayList<StringPair> getMappingsLabels( ArrayList<String> g1Labels, ArrayList<String> g2Labels, double threshold) { SnowballStemmer englishStemmer = Settings.getStemmer("english"); int dimFunc = g1Labels.size() > g2Labels.size() ? g1Labels.size() : g2Labels.size(); double costs[][] = new double[dimFunc][dimFunc]; double costsCopy[][] = new double[dimFunc][dimFunc]; ArrayList<StringPair> solutionMappings = new ArrayList<StringPair>(); if (g1Labels.size() == 0 || g2Labels.size() == 0) { return solutionMappings; } // function mapping score for (int i = 0; i < g1Labels.size(); i++) { for (int j = 0; j < g2Labels.size(); j++) { double edScore; edScore = LabelEditDistance.edTokensWithStemming( g1Labels.get(i), g2Labels.get(j), Settings.STRING_DELIMETER, englishStemmer, true); if (edScore < threshold) edScore = 1; costs[i][j] = edScore; } } for (int i = 0; i < costs.length; i++) { for (int j = 0; j < costs[0].length; j++) { costsCopy[i][j] = costs[i][j]; } } int[][] result = HungarianAlgorithm.computeAssignments(costsCopy); for (int i = 0; i < result.length; i++) { solutionMappings.add(new StringPair(g1Labels.get(result[i][0]), g2Labels.get(result[i][1]))); } return solutionMappings; }
/** * Finds the vertex mapping * * @param g1Vertices - graph g1 vertices that need to be matched with graph g1 vertices * @param g2Vertices - graph g2 vertices * @param threshold - if node similarity is >= than threshold then these nodes are considered to * be matched. * @param stemmer - stemmer for wrord stemming, if == null, then english stemmer is used * @param gateways - if == 0, then gateways are not matched, if == 1, then only parent are looked, * if == 2, then only children are looked * @return matching vertex pairs */ public static ArrayList<VertexPair> getMappingsVetrex( ArrayList<Vertex> g1Vertices, ArrayList<Vertex> g2Vertices, double threshold, SnowballStemmer stemmer, int gateways) { ArrayList<VertexPair> solutionMappings = new ArrayList<VertexPair>(); if (g1Vertices.size() == 0 || g2Vertices.size() == 0) { return solutionMappings; } if (stemmer == null) { stemmer = Settings.getEnglishStemmer(); } ArrayList<Vertex> g1Vertices_fe = new ArrayList<Vertex>(); ArrayList<Vertex> g2Vertices_fe = new ArrayList<Vertex>(); for (Vertex v : g1Vertices) { if (!v.getType().equals(Vertex.Type.gateway)) { g1Vertices_fe.add(v); } } for (Vertex v : g2Vertices) { if (!v.getType().equals(Vertex.Type.gateway)) { g2Vertices_fe.add(v); } } if (g1Vertices_fe.size() > 0 && g2Vertices_fe.size() > 0) { int dimFunc = g1Vertices_fe.size() > g2Vertices_fe.size() ? g1Vertices_fe.size() : g2Vertices_fe.size(); double costs[][] = new double[dimFunc][dimFunc]; double costsCopy[][] = new double[dimFunc][dimFunc]; int nrZeros = 0; // function mapping score for (int i = 0; i < g1Vertices_fe.size(); i++) { for (int j = 0; j < g2Vertices_fe.size(); j++) { double edScore = 0; if (g1Vertices_fe.get(i).getType().equals(g2Vertices_fe.get(j).getType()) && g1Vertices_fe.get(i).getLabel() != null && g2Vertices_fe.get(j).getLabel() != null) { edScore = LabelEditDistance.edTokensWithStemming( g1Vertices_fe.get(i).getLabel(), g2Vertices_fe.get(j).getLabel(), Settings.STRING_DELIMETER, stemmer, true); } if (edScore < threshold) edScore = 0; if (edScore == 0) { nrZeros++; } costs[i][j] = (-1) * edScore; } } if (nrZeros != g1Vertices_fe.size() * g2Vertices_fe.size()) { for (int i = 0; i < costs.length; i++) { for (int j = 0; j < costs[0].length; j++) { costsCopy[i][j] = costs[i][j]; } } int[][] result = HungarianAlgorithm.computeAssignments(costsCopy); for (int i = 0; i < result.length; i++) { double pairCost = (-1) * costs[result[i][0]][result[i][1]]; if (result[i][0] < g1Vertices_fe.size() && result[i][1] < g2Vertices_fe.size() && pairCost >= threshold && AssingmentProblem.canMap( g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]))) { solutionMappings.add( new VertexPair( g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]), pairCost)); } } } } if (gateways > 0) { solutionMappings.addAll( getMappingsGateways(g1Vertices, g2Vertices, threshold, stemmer, gateways)); } return solutionMappings; }