/** * chooses the cluster with the highest number of semantic concepts that correspond to different * search keywords * * @param graph the graph * @param correspondingKeywords Mapping of semantic concepts (nodeIDs) to search keywords */ public void filterClusterByInterconnectionLevel( WTPGraph graph, Map<String, String> correspondingKeywords) { Set<Set<String>> clusters = getClusters(graph); printClusters(clusters); Set<Set<String>> resultClusters = new HashSet<Set<String>>(); // get biggest number of corresponding keywords of one cluster int maxNumberOfCorrespondingKeywords = 0; for (Set<String> cluster : clusters) { int numOfDifferentCorrespondingKeywords = getNumberOfCorrespondingKeywords(cluster, correspondingKeywords); System.out.println( "Number of different corresponding keywords: " + numOfDifferentCorrespondingKeywords); if (numOfDifferentCorrespondingKeywords > maxNumberOfCorrespondingKeywords) { maxNumberOfCorrespondingKeywords = numOfDifferentCorrespondingKeywords; resultClusters = new HashSet<Set<String>>(); resultClusters.add(cluster); } else if (numOfDifferentCorrespondingKeywords == maxNumberOfCorrespondingKeywords) { resultClusters.add(cluster); } } Set<String> survivingNodes = new HashSet<String>(); for (Set<String> cluster : resultClusters) { survivingNodes.addAll(cluster); } Set<Node> dyingNodes = new HashSet<Node>(); for (Node n : graph.getGraph().getNodeSet()) { if (!survivingNodes.contains(n.getId())) dyingNodes.add(n); } graph.getGraph().getNodeSet().removeAll(dyingNodes); }
/** * This methods deletes all nodes of the graph that are not part of a path * * @param paths all paths that are interconnecting nodes, which correspond to different keywords * @param graph the graph */ private void filterInterconnectedNodes(List<Path> paths, WTPGraph graph) { // NodeIDs of the nodes that are part of at least one of the path Set<String> interconnectedNodeIDs = new HashSet<String>(); // Write all nodeIDs included in at least one of the path into the interconnectedNodeIDs Set for (Path p : paths) { System.out.println("Path: " + p.toString()); for (Node n : p) { interconnectedNodeIDs.add(n.getId()); } } System.out.println( "Count of Nodes before interconnectionFiltering: " + graph.getGraph().getNodeCount()); // Nodes whose nodeID is not included in interconnectedNodeIDs and which should be deleted, // because they are not interconnected List<Node> notInterconnectedNodes = new LinkedList<Node>(); for (Node n : graph.getGraph().getNodeSet()) { if (!interconnectedNodeIDs.contains(n.getId())) notInterconnectedNodes.add(n); } // remove all of the not interconnected Nodes graph.getGraph().getNodeSet().removeAll(notInterconnectedNodes); System.out.println( "Count of Nodes after interconnectionFiltering:" + graph.getGraph().getNodeCount()); }
/** * @param graph the graph * @param relevanceThreshold atleast number of including paths to be relevant */ private void removeNodesWithLessThanNOccurrences(WTPGraph graph, int minNumOfOccurrences) { if (countOfNodeOccurrencesInPaths != null) { LinkedList<Node> nodesToDelete = new LinkedList<Node>(); for (Node node : graph.getGraph().getNodeSet()) { Integer occurrenceCount = countOfNodeOccurrencesInPaths.get(node.getId()); if (occurrenceCount == null || occurrenceCount < minNumOfOccurrences) nodesToDelete.add(node); } graph.getGraph().getNodeSet().removeAll(nodesToDelete); // System.out.println("nodes filtered by number of including paths heuristic"); } }
public void filterClusterBySize(WTPGraph graph) { Set<Set<String>> clusters = getClusters(graph); printClusters(clusters); Set<String> survivingNodes = new HashSet<String>(); int maxClusterSize = 0; for (Set<String> cluster : clusters) { System.out.println("Size of Cluster: " + cluster.size()); if (cluster.size() > maxClusterSize) { maxClusterSize = cluster.size(); survivingNodes = new HashSet<String>(); survivingNodes.addAll(cluster); } else if (cluster.size() == maxClusterSize) { survivingNodes.addAll(cluster); } } Set<Node> dyingNodes = new HashSet<Node>(); for (Node n : graph.getGraph().getNodeSet()) { if (!survivingNodes.contains(n.getId())) dyingNodes.add(n); } graph.getGraph().getNodeSet().removeAll(dyingNodes); }
public void filterClusterByNodeOccurrencesInPaths(WTPGraph graph, List<Path> paths) { Set<Set<String>> clusters = getClusters(graph); printClusters(clusters); Set<String> survivingNodes = new HashSet<String>(); int maxOccurrance = 0; for (Set<String> cluster : clusters) { int count = getNumberOfClusterOccurrencesInPaths(cluster, paths); System.out.println("Number of cluster occurrences in paths: " + count); if (count > maxOccurrance) { maxOccurrance = count; survivingNodes = new HashSet<String>(); survivingNodes.addAll(cluster); } else if (count == maxOccurrance) { survivingNodes.addAll(cluster); } } Set<Node> dyingNodes = new HashSet<Node>(); for (Node n : graph.getGraph().getNodeSet()) { if (!survivingNodes.contains(n.getId())) dyingNodes.add(n); } graph.getGraph().getNodeSet().removeAll(dyingNodes); }
private Set<Set<String>> getClusters(WTPGraph graph) { visitedWhileClustering = new HashSet<String>(); Set<Set<String>> clusters = new HashSet<Set<String>>(); // for each node of the graph: if not yet in a cluster, try to generate the cluster starting // with this node for (Node n : graph.getGraph().getNodeSet()) { String id = n.getId(); if (!visitedWhileClustering.contains(id)) { clusters.add(getCluster(graph, id)); } } return clusters; }
/** * this method creates a set of all nodes that are connected by at least one path (cluster) it * starts with on nodeID and uses a breadth first search * * @param graph the graph * @param id of one node of the cluster * @return a set of node ids, which are all in the same cluster */ private Set<String> getCluster(WTPGraph graph, String id) { Queue<String> queue = new LinkedList<String>(); Set<String> cluster = new HashSet<String>(); queue.add(id); while (!queue.isEmpty()) { String temp = queue.poll(); if (!visitedWhileClustering.contains(temp)) { visitedWhileClustering.add(temp); cluster.add(temp); Iterator<Node> neighbourNodesIterator = graph.getGraph().getNode(temp).getNeighborNodeIterator(); while (neighbourNodesIterator.hasNext()) { queue.add(neighbourNodesIterator.next().getId()); } } } return cluster; }
private void _genSFPinQueue(List<String> keywords, UUID jobID) { // create timestamp Date now = Calendar.getInstance().getTime(); // create jobqueue entry jobqueue.put(jobID, new SFPGenJob(SFPGenJob.PROCESSING, now)); System.out.println("[_genSFPinQueue] size of job queue: " + jobqueue.size()); System.out.println("Job accessible @ " + this.jobqueue.toString()); // map for the semantic concepts found in the ontology and their // corresponding keyword, used for searching them Map<String, String> correspondingKeywords = new HashMap<String, String>(); KeyWordSearch s = new KeyWordSearch(); List<SearchResult> res = s.search(keywords, maxSearchResults, correspondingKeywords); System.out.println("Resultlist from KW search: " + res); List<String> request = KeyWordSearch.toUriList(res); System.out.println("Starting BFS..."); BreadthFirstSearch lc = new BreadthFirstSearch(); ResultSet result = lc.getConnections(request, maxSearchDepth); System.out.println("...Done"); // -- 2) create the graph System.out.println("Creating the initial graph..."); WTPGraph graph = WTPGraph.createFromResultSet(result, "Semantic Fingerprint"); System.out.println("...Done"); // -- 3) remove specific edges // graph.removeEdgesByName("ject"); // graph.removeEdgesByName("paradigm"); // graph.removeEdgesByName("influencedBy"); // graph.removeEdgesByName("influenced"); // graph.removeEdgesByName("typing"); // graph.removeEdgesByName("license"); // -- 4) tidy graph System.out.print( "Tidying graph (" + graph.getNodeCount() + " Nodes, " + graph.getEdgeCount() + " Edges) ..."); GraphCleaner c = new GraphCleaner(graph.getGraph(), result.requestNodes); LinkedList<graph.GraphCleaner.Path> paths = c.clean(maxPathLength, maxPathExtensionLength); System.out.println( " Done (" + graph.getNodeCount() + " Nodes, " + graph.getEdgeCount() + " Edges, " + paths.size() + " Paths)"); // --4.2) heuristics finger print selection InterConceptConntecting heuristic = new InterConceptConntecting(); /** Filters all Nodes that have paths to other Nodes which correspond to a different keyword */ // heuristic.filterInterconntection(graph, paths, // correspondingKeywords); /** Filters the n Nodes which occur most frequently in the paths */ heuristic.filterNMostFrequentlyOccuring( graph, paths, numRelevantNodesFilter, correspondingKeywords); /** Selects the cluster which corresponds to the most different keywords */ heuristic.filterClusterByInterconnectionLevel(graph, correspondingKeywords); /** Selects the biggest cluster */ heuristic.filterClusterBySize(graph); /** Selects the cluster whose nodes occur most frequently in the paths */ // ArrayList<ArrayList<String>> graph = new ArrayList<ArrayString>(); // convert WTP graph to RDF Model rdfgraph = WTPGraph.getRDFGraph(graph); rdfgraph.write(System.out); /* * ObjectMapper mapper = new ObjectMapper(); * * * try { return * makeCORS(Response.status(Status.OK).entity(mapper.writeValueAsString * (rdfgraph.write(System.out))), ""); } catch (JsonGenerationException * e) { // TODO Auto-generated catch block e.printStackTrace(); } catch * (JsonMappingException e) { // TODO Auto-generated catch block * e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated * catch block e.printStackTrace(); * * } return makeCORS(Response.status(Status.OK), ""); */ OutputStream output = new OutputStream() { private StringBuilder string = new StringBuilder(); @Override public void write(int b) throws IOException { this.string.append((char) b); } public String toString() { return this.string.toString(); } }; rdfgraph.write((OutputStream) output); // put result in sfplist this.sfplist.put(jobID, output.toString()); // get the job object of current jobid and update it SFPGenJob currJob = this.jobqueue.get(jobID); currJob.updateStatus(SFPGenJob.FINISHED); // update timestamp now = Calendar.getInstance().getTime(); currJob.updateTimestamp(now); this.jobqueue.put(jobID, currJob); }