public static double LexicalSimilarityScoreMax( ArrayList<TaggedWord> taggedWords1, ArrayList<TaggedWord> taggedWords2, DISCOSimilarity discoRAM, LexicalizedParser lp) { // System.out.println(taggedWords1.size() + "," + taggedWords2.size()); // array of edge weights with default weight 0 int length1 = taggedWords1.size(); int length2 = taggedWords2.size(); int arrSize = Math.max(length1, length2); double[][] array = new double[arrSize][arrSize]; for (int i = 0; i < arrSize; i++) { for (int j = 0; j < arrSize; j++) { array[i][j] = 0; } } for (int i = 0; i < length1; i++) { for (int j = 0; j < length2; j++) { String word1 = taggedWords1.get(i).word(); String posTag1 = taggedWords1.get(i).tag(); String word2 = taggedWords2.get(j).word(); String posTag2 = taggedWords2.get(j).tag(); ArrayList<TaggedWord> newList1 = new ArrayList<TaggedWord>(); if (posTag1.length() >= 3 && posTag1.substring(0, 3).equals("NNP")) { newList1.add(taggedWords1.get(i)); } else { String[] words = word1.split(" "); for (int k = 0; k < words.length; k++) newList1.add(new TaggedWord(words[k], posTag1)); } ArrayList<TaggedWord> newList2 = new ArrayList<TaggedWord>(); if (posTag2.length() >= 3 && posTag2.substring(0, 3).equals("NNP")) { newList2.add(taggedWords2.get(j)); } else { String[] words = word2.split(" "); for (int k = 0; k < words.length; k++) newList2.add(new TaggedWord(words[k], posTag2)); } double edgeWeight = LexicalSimilarityScoreMin(newList1, newList2, discoRAM, lp); array[i][j] = edgeWeight; } } // System.out.println("Hungarian starts " + arrSize); double finalScore; String sumType = "max"; // int minLength = Math.min(length1, length2); // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5; finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5; return finalScore; }
public static double LexicalSimilarityScoreMin( ArrayList<TaggedWord> taggedWords1, ArrayList<TaggedWord> taggedWords2, DISCOSimilarity discoRAM, LexicalizedParser lp) { // System.out.println(taggedWords1.size() + "," + taggedWords2.size()); // array of edge weights with default weight 0 int length1 = taggedWords1.size(); int length2 = taggedWords2.size(); int arrSize = Math.max(length1, length2); double[][] array = new double[arrSize][arrSize]; for (int i = 0; i < arrSize; i++) { for (int j = 0; j < arrSize; j++) { array[i][j] = 0; } } for (int i = 0; i < length1; i++) { for (int j = 0; j < length2; j++) { String word1 = taggedWords1.get(i).word(); String word2 = taggedWords2.get(j).word(); double edgeWeight = 0; // LSA Similarity // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2); // DISCO Similarity // DISCOSimilarity discoObj = new DISCOSimilarity(); try { if (word1.compareToIgnoreCase(word2) == 0) edgeWeight = 1; else { edgeWeight = discoRAM.similarity2(word1, word2); // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2); } } catch (Exception ex) { ex.printStackTrace(); } array[i][j] = edgeWeight; } } // System.out.println("Hungarian starts " + arrSize); double finalScore; String sumType = "max"; int minLength = Math.min(length1, length2); finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / minLength * 5; // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/arrSize * 5; return finalScore; }
public static double LexicalSimilarity2Level( String sentence1, String sentence2, DISCOSimilarity discoRAM, LexicalizedParser lp) { Tree parse1 = lp.apply(sentence1); Tree parse2 = lp.apply(sentence2); int phraseSizeLimit = 2; ArrayList<ArrayList<TaggedWord>> phrasesList1 = getPhrases(parse1, phraseSizeLimit); ArrayList<ArrayList<TaggedWord>> phrasesList2 = getPhrases(parse2, phraseSizeLimit); int length1 = phrasesList1.size(); int length2 = phrasesList2.size(); int arrSize = Math.max(length1, length2); double[][] array = new double[arrSize][arrSize]; for (int i = 0; i < arrSize; i++) { for (int j = 0; j < arrSize; j++) { array[i][j] = 0; } } for (int i = 0; i < length1; i++) { for (int j = 0; j < length2; j++) { double edgeWeight = 0; ArrayList<TaggedWord> taggedWords1 = phrasesList1.get(i); ArrayList<TaggedWord> taggedWords2 = phrasesList2.get(j); // edgeWeight = LexicalSimilarityScore(taggedWords1, taggedWords2, discoRAM, lp)/5.0; edgeWeight = BestWordMatchEdgeWeight(taggedWords1, taggedWords2, discoRAM); array[i][j] = edgeWeight; } } // System.out.println("Hungarian starts " + arrSize); double finalScore; String sumType = "max"; // int minLength = Math.min(length1, length2); // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5; if (arrSize == 0) finalScore = 0; else finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5; return finalScore; }
static ArrayList<StringPair> getMappingsLabels( ArrayList<String> g1Labels, ArrayList<String> g2Labels, double threshold) { SnowballStemmer englishStemmer = Settings.getStemmer("english"); int dimFunc = g1Labels.size() > g2Labels.size() ? g1Labels.size() : g2Labels.size(); double costs[][] = new double[dimFunc][dimFunc]; double costsCopy[][] = new double[dimFunc][dimFunc]; ArrayList<StringPair> solutionMappings = new ArrayList<StringPair>(); if (g1Labels.size() == 0 || g2Labels.size() == 0) { return solutionMappings; } // function mapping score for (int i = 0; i < g1Labels.size(); i++) { for (int j = 0; j < g2Labels.size(); j++) { double edScore; edScore = LabelEditDistance.edTokensWithStemming( g1Labels.get(i), g2Labels.get(j), Settings.STRING_DELIMETER, englishStemmer, true); if (edScore < threshold) edScore = 1; costs[i][j] = edScore; } } for (int i = 0; i < costs.length; i++) { for (int j = 0; j < costs[0].length; j++) { costsCopy[i][j] = costs[i][j]; } } int[][] result = HungarianAlgorithm.computeAssignments(costsCopy); for (int i = 0; i < result.length; i++) { solutionMappings.add(new StringPair(g1Labels.get(result[i][0]), g2Labels.get(result[i][1]))); } return solutionMappings; }
public static double LexicalSimilarityScoreWordNet( String sentence1, String sentence2, LeskWSD tm, LexicalizedParser lp, WordNetSimilarity ws) { ArrayList<TaggedWord> taggedWordsPrev1 = Preprocess(StanfordParse(sentence1, lp)); ArrayList<TaggedWord> taggedWordsPrev2 = Preprocess(StanfordParse(sentence2, lp)); ArrayList<TaggedWord> taggedWords1 = new ArrayList<TaggedWord>(); ArrayList<TaggedWord> taggedWords2 = new ArrayList<TaggedWord>(); WordNetSense[] sensesPrev1 = tm.LeskJWI(sentence1); WordNetSense[] sensesPrev2 = tm.LeskJWI(sentence2); // System.out.println("Senses found!"); ArrayList<WordNetSense> senses1 = new ArrayList<WordNetSense>(); ArrayList<WordNetSense> senses2 = new ArrayList<WordNetSense>(); for (int i = 0; i < taggedWordsPrev1.size(); i++) { String word = taggedWordsPrev1.get(i).word(); String posTag = taggedWordsPrev1.get(i).tag(); if (posTag.length() >= 2 && posTag.substring(0, 2).equals("NN")) { taggedWords1.add(new TaggedWord(word, "NN")); senses1.add(sensesPrev1[i]); } else if (posTag.length() >= 2 && posTag.substring(0, 2).equals("VB")) { taggedWords1.add(new TaggedWord(word, "VB")); senses1.add(sensesPrev1[i]); } } for (int i = 0; i < taggedWordsPrev2.size(); i++) { String word = taggedWordsPrev2.get(i).word(); String posTag = taggedWordsPrev2.get(i).tag(); if (posTag.length() >= 2 && posTag.substring(0, 2).equals("NN")) { taggedWords2.add(new TaggedWord(word, "NN")); senses2.add(sensesPrev2[i]); } else if (posTag.length() >= 2 && posTag.substring(0, 2).equals("VB")) { taggedWords2.add(new TaggedWord(word, "VB")); senses2.add(sensesPrev2[i]); } } // System.out.println(taggedWords1.size() + "," + taggedWords2.size()); // array of edge weights with default weight 0 int length1 = taggedWords1.size(); int length2 = taggedWords2.size(); int arrSize = Math.max(length1, length2); double[][] array = new double[arrSize][arrSize]; for (int i = 0; i < arrSize; i++) { for (int j = 0; j < arrSize; j++) { array[i][j] = 0; } } for (int i = 0; i < length1; i++) { for (int j = 0; j < length2; j++) { String word1 = taggedWords1.get(i).word(); String posTag1 = taggedWords1.get(i).tag(); String word2 = taggedWords2.get(j).word(); String posTag2 = taggedWords2.get(j).tag(); double edgeWeight = 0; // LSA Similarity // edgeWeight = LSASimilarity.LSAWordSimilarity(word1, word2); // DISCO Similarity // DISCOSimilarity discoObj = new DISCOSimilarity(); try { if (word1.compareToIgnoreCase(word2) == 0) edgeWeight = 1; else { // edgeWeight = ws.wuPalmerSimilarity(senses1.get(i), senses2.get(j)); edgeWeight = ws.linSimilarity(senses1.get(i), senses2.get(j)); } } catch (Exception ex) { ex.printStackTrace(); } array[i][j] = edgeWeight; } } // System.out.println("Hungarian starts " + arrSize); double finalScore; String sumType = "max"; int minLength = Math.min(length1, length2); // finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType)/minLength * 5; if (arrSize == 0) finalScore = 0; else finalScore = HungarianAlgorithm.hgAlgorithm(array, sumType) / arrSize * 5; return finalScore; }
/** * Finds the vertex mapping * * @param g1Vertices - graph g1 vertices that need to be matched with graph g1 vertices * @param g2Vertices - graph g2 vertices * @param threshold - if node similarity is >= than threshold then these nodes are considered to * be matched. * @param stemmer - stemmer for wrord stemming, if == null, then english stemmer is used * @param gateways - if == 0, then gateways are not matched, if == 1, then only parent are looked, * if == 2, then only children are looked * @return matching vertex pairs */ public static ArrayList<VertexPair> getMappingsVetrex( ArrayList<Vertex> g1Vertices, ArrayList<Vertex> g2Vertices, double threshold, SnowballStemmer stemmer, int gateways) { ArrayList<VertexPair> solutionMappings = new ArrayList<VertexPair>(); if (g1Vertices.size() == 0 || g2Vertices.size() == 0) { return solutionMappings; } if (stemmer == null) { stemmer = Settings.getEnglishStemmer(); } ArrayList<Vertex> g1Vertices_fe = new ArrayList<Vertex>(); ArrayList<Vertex> g2Vertices_fe = new ArrayList<Vertex>(); for (Vertex v : g1Vertices) { if (!v.getType().equals(Vertex.Type.gateway)) { g1Vertices_fe.add(v); } } for (Vertex v : g2Vertices) { if (!v.getType().equals(Vertex.Type.gateway)) { g2Vertices_fe.add(v); } } if (g1Vertices_fe.size() > 0 && g2Vertices_fe.size() > 0) { int dimFunc = g1Vertices_fe.size() > g2Vertices_fe.size() ? g1Vertices_fe.size() : g2Vertices_fe.size(); double costs[][] = new double[dimFunc][dimFunc]; double costsCopy[][] = new double[dimFunc][dimFunc]; int nrZeros = 0; // function mapping score for (int i = 0; i < g1Vertices_fe.size(); i++) { for (int j = 0; j < g2Vertices_fe.size(); j++) { double edScore = 0; if (g1Vertices_fe.get(i).getType().equals(g2Vertices_fe.get(j).getType()) && g1Vertices_fe.get(i).getLabel() != null && g2Vertices_fe.get(j).getLabel() != null) { edScore = LabelEditDistance.edTokensWithStemming( g1Vertices_fe.get(i).getLabel(), g2Vertices_fe.get(j).getLabel(), Settings.STRING_DELIMETER, stemmer, true); } if (edScore < threshold) edScore = 0; if (edScore == 0) { nrZeros++; } costs[i][j] = (-1) * edScore; } } if (nrZeros != g1Vertices_fe.size() * g2Vertices_fe.size()) { for (int i = 0; i < costs.length; i++) { for (int j = 0; j < costs[0].length; j++) { costsCopy[i][j] = costs[i][j]; } } int[][] result = HungarianAlgorithm.computeAssignments(costsCopy); for (int i = 0; i < result.length; i++) { double pairCost = (-1) * costs[result[i][0]][result[i][1]]; if (result[i][0] < g1Vertices_fe.size() && result[i][1] < g2Vertices_fe.size() && pairCost >= threshold && AssingmentProblem.canMap( g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]))) { solutionMappings.add( new VertexPair( g1Vertices_fe.get(result[i][0]), g2Vertices_fe.get(result[i][1]), pairCost)); } } } } if (gateways > 0) { solutionMappings.addAll( getMappingsGateways(g1Vertices, g2Vertices, threshold, stemmer, gateways)); } return solutionMappings; }
/** * Finds the match between gateways, the decision is made based on the match of gateway * parents/children match, if the parent/child is also a gateway, then the decision is done * recursively * * @param g1Vertices - graph g1 vertices that need to be matched with graph g1 vertices * @param g2Vertices - graph g2 vertices * @param threshold - if node similarity is >= than threshold then these nodes are considered to * be matched. * @param stemmer - stemmer for wrord stemming, if == null, then english stemmer is used * @param lookParents - if == 0, then gateways are not matched, if == 1, then only parent are * looked, if == 2, then only children are looked * @return */ public static ArrayList<VertexPair> getMappingsGateways( ArrayList<Vertex> g1Vertices, ArrayList<Vertex> g2Vertices, double threshold, SnowballStemmer stemmer, int lookParents) { ArrayList<Vertex> g1Gateways = new ArrayList<Vertex>(); ArrayList<Vertex> g2Gateways = new ArrayList<Vertex>(); ArrayList<VertexPair> possibleMatches = new ArrayList<VertexPair>(); for (Vertex v : g1Vertices) { if (v.getType().equals(Vertex.Type.gateway)) { g1Gateways.add(v); } } for (Vertex v : g2Vertices) { if (v.getType().equals(Vertex.Type.gateway)) { g2Gateways.add(v); } } if (g1Gateways.size() == 0 || g2Gateways.size() == 0) { return possibleMatches; } int dimFunc = g1Gateways.size() > g2Gateways.size() ? g1Gateways.size() : g2Gateways.size(); double costs[][] = new double[dimFunc][dimFunc]; double costsCopy[][] = new double[dimFunc][dimFunc]; for (int i = 0; i < g1Gateways.size(); i++) { for (int j = 0; j < g2Gateways.size(); j++) { double edScore = 0; ArrayList<VertexPair> map; if (lookParents == 2) { map = getMappingsVetrex( g1Gateways.get(i).getChildren(), g2Gateways.get(j).getChildren(), threshold, stemmer, lookParents); for (VertexPair vp : map) { edScore += vp.getWeight(); } edScore = map.size() == 0 ? 0 : edScore / map.size(); } else if (lookParents == 1) { map = getMappingsVetrex( g1Gateways.get(i).getParents(), g2Gateways.get(j).getParents(), threshold, stemmer, lookParents); for (VertexPair vp : map) { edScore += vp.getWeight(); } edScore = map.size() == 0 ? 0 : edScore / map.size(); } if (edScore < threshold) edScore = 0; costs[i][j] = (-1) * edScore; } } for (int i = 0; i < costs.length; i++) { for (int j = 0; j < costs[0].length; j++) { costsCopy[i][j] = costs[i][j]; } } int[][] result = HungarianAlgorithm.computeAssignments(costsCopy); for (int i = 0; i < result.length; i++) { double pairCost = (-1) * costs[result[i][0]][result[i][1]]; if (result[i][0] < g1Gateways.size() && result[i][1] < g2Gateways.size() && pairCost > 0) { possibleMatches.add( new VertexPair(g1Gateways.get(result[i][0]), g2Gateways.get(result[i][1]), pairCost)); } } return possibleMatches; }
public static ArrayList<VertexPair> getMappingsVetrexUsingNodeMapping( Graph g1, Graph g2, double threshold, double semanticThreshold) { ArrayList<Vertex> g1Vertices = (ArrayList<Vertex>) g1.getVertices(); ArrayList<Vertex> g2Vertices = (ArrayList<Vertex>) g2.getVertices(); ArrayList<VertexPair> solutionMappings = new ArrayList<VertexPair>(); if (g1Vertices.size() == 0 || g2Vertices.size() == 0) { return solutionMappings; } if (g1Vertices.size() > 0 && g2Vertices.size() > 0) { int dimFunc = g1Vertices.size() > g2Vertices.size() ? g1Vertices.size() : g2Vertices.size(); double costs[][] = new double[dimFunc][dimFunc]; double costsCopy[][] = new double[dimFunc][dimFunc]; int nrZeros = 0; // function mapping score for (int i = 0; i < g1Vertices.size(); i++) { for (int j = 0; j < g2Vertices.size(); j++) { double edScore = NodeSimilarity.findNodeSimilarity(g1Vertices.get(i), g2Vertices.get(j), threshold); if (g1Vertices.get(i).getType().equals(Type.gateway) && g2Vertices.get(j).getType().equals(Type.gateway) && edScore < semanticThreshold) { edScore = 0; } else if (!(g1Vertices.get(i).getType().equals(Type.gateway) && g2Vertices.get(j).getType().equals(Type.gateway)) && edScore < threshold) edScore = 0; if (edScore == 0) { nrZeros++; } costs[i][j] = (-1) * edScore; } } if (nrZeros != g1Vertices.size() * g2Vertices.size()) { for (int i = 0; i < costs.length; i++) { for (int j = 0; j < costs[0].length; j++) { costsCopy[i][j] = costs[i][j]; } } int[][] result = HungarianAlgorithm.computeAssignments(costsCopy); for (int i = 0; i < result.length; i++) { double pairCost = (-1) * costs[result[i][0]][result[i][1]]; if (result[i][0] < g1Vertices.size() && result[i][1] < g2Vertices.size() && pairCost > 0 && AssingmentProblem.canMap( g1Vertices.get(result[i][0]), g2Vertices.get(result[i][1]))) { solutionMappings.add( new VertexPair( g1Vertices.get(result[i][0]), g2Vertices.get(result[i][1]), pairCost)); } } } } return solutionMappings; }