コード例 #1
0
  /**
   * chooses the cluster with the highest number of semantic concepts that correspond to different
   * search keywords
   *
   * @param graph the graph
   * @param correspondingKeywords Mapping of semantic concepts (nodeIDs) to search keywords
   */
  public void filterClusterByInterconnectionLevel(
      WTPGraph graph, Map<String, String> correspondingKeywords) {
    Set<Set<String>> clusters = getClusters(graph);

    printClusters(clusters);
    Set<Set<String>> resultClusters = new HashSet<Set<String>>();

    // get biggest number of corresponding keywords of one cluster
    int maxNumberOfCorrespondingKeywords = 0;
    for (Set<String> cluster : clusters) {
      int numOfDifferentCorrespondingKeywords =
          getNumberOfCorrespondingKeywords(cluster, correspondingKeywords);
      System.out.println(
          "Number of different corresponding keywords:  " + numOfDifferentCorrespondingKeywords);
      if (numOfDifferentCorrespondingKeywords > maxNumberOfCorrespondingKeywords) {
        maxNumberOfCorrespondingKeywords = numOfDifferentCorrespondingKeywords;
        resultClusters = new HashSet<Set<String>>();
        resultClusters.add(cluster);
      } else if (numOfDifferentCorrespondingKeywords == maxNumberOfCorrespondingKeywords) {
        resultClusters.add(cluster);
      }
    }

    Set<String> survivingNodes = new HashSet<String>();
    for (Set<String> cluster : resultClusters) {
      survivingNodes.addAll(cluster);
    }
    Set<Node> dyingNodes = new HashSet<Node>();
    for (Node n : graph.getGraph().getNodeSet()) {
      if (!survivingNodes.contains(n.getId())) dyingNodes.add(n);
    }
    graph.getGraph().getNodeSet().removeAll(dyingNodes);
  }
コード例 #2
0
  /**
   * This methods deletes all nodes of the graph that are not part of a path
   *
   * @param paths all paths that are interconnecting nodes, which correspond to different keywords
   * @param graph the graph
   */
  private void filterInterconnectedNodes(List<Path> paths, WTPGraph graph) {
    // NodeIDs of the nodes that are part of at least one of the path
    Set<String> interconnectedNodeIDs = new HashSet<String>();
    // Write all nodeIDs included in at least one of the path into the interconnectedNodeIDs Set
    for (Path p : paths) {
      System.out.println("Path: " + p.toString());
      for (Node n : p) {
        interconnectedNodeIDs.add(n.getId());
      }
    }
    System.out.println(
        "Count of Nodes before interconnectionFiltering: " + graph.getGraph().getNodeCount());

    // Nodes whose nodeID is not included in interconnectedNodeIDs and which should be deleted,
    // because they are not interconnected
    List<Node> notInterconnectedNodes = new LinkedList<Node>();
    for (Node n : graph.getGraph().getNodeSet()) {
      if (!interconnectedNodeIDs.contains(n.getId())) notInterconnectedNodes.add(n);
    }
    // remove all of the not interconnected Nodes
    graph.getGraph().getNodeSet().removeAll(notInterconnectedNodes);

    System.out.println(
        "Count of Nodes after interconnectionFiltering:" + graph.getGraph().getNodeCount());
  }
コード例 #3
0
 /**
  * @param graph the graph
  * @param relevanceThreshold atleast number of including paths to be relevant
  */
 private void removeNodesWithLessThanNOccurrences(WTPGraph graph, int minNumOfOccurrences) {
   if (countOfNodeOccurrencesInPaths != null) {
     LinkedList<Node> nodesToDelete = new LinkedList<Node>();
     for (Node node : graph.getGraph().getNodeSet()) {
       Integer occurrenceCount = countOfNodeOccurrencesInPaths.get(node.getId());
       if (occurrenceCount == null || occurrenceCount < minNumOfOccurrences)
         nodesToDelete.add(node);
     }
     graph.getGraph().getNodeSet().removeAll(nodesToDelete);
     // System.out.println("nodes filtered by number of including paths heuristic");
   }
 }
コード例 #4
0
  public void filterClusterBySize(WTPGraph graph) {
    Set<Set<String>> clusters = getClusters(graph);
    printClusters(clusters);

    Set<String> survivingNodes = new HashSet<String>();
    int maxClusterSize = 0;
    for (Set<String> cluster : clusters) {
      System.out.println("Size of Cluster: " + cluster.size());
      if (cluster.size() > maxClusterSize) {
        maxClusterSize = cluster.size();
        survivingNodes = new HashSet<String>();
        survivingNodes.addAll(cluster);
      } else if (cluster.size() == maxClusterSize) {
        survivingNodes.addAll(cluster);
      }
    }
    Set<Node> dyingNodes = new HashSet<Node>();
    for (Node n : graph.getGraph().getNodeSet()) {
      if (!survivingNodes.contains(n.getId())) dyingNodes.add(n);
    }
    graph.getGraph().getNodeSet().removeAll(dyingNodes);
  }
コード例 #5
0
  public void filterClusterByNodeOccurrencesInPaths(WTPGraph graph, List<Path> paths) {
    Set<Set<String>> clusters = getClusters(graph);
    printClusters(clusters);

    Set<String> survivingNodes = new HashSet<String>();
    int maxOccurrance = 0;
    for (Set<String> cluster : clusters) {
      int count = getNumberOfClusterOccurrencesInPaths(cluster, paths);
      System.out.println("Number of cluster occurrences in paths: " + count);
      if (count > maxOccurrance) {
        maxOccurrance = count;
        survivingNodes = new HashSet<String>();
        survivingNodes.addAll(cluster);
      } else if (count == maxOccurrance) {
        survivingNodes.addAll(cluster);
      }
    }
    Set<Node> dyingNodes = new HashSet<Node>();
    for (Node n : graph.getGraph().getNodeSet()) {
      if (!survivingNodes.contains(n.getId())) dyingNodes.add(n);
    }
    graph.getGraph().getNodeSet().removeAll(dyingNodes);
  }
コード例 #6
0
  private Set<Set<String>> getClusters(WTPGraph graph) {
    visitedWhileClustering = new HashSet<String>();
    Set<Set<String>> clusters = new HashSet<Set<String>>();

    // for each node of the graph: if not yet in a cluster, try to generate the cluster starting
    // with this node
    for (Node n : graph.getGraph().getNodeSet()) {
      String id = n.getId();
      if (!visitedWhileClustering.contains(id)) {
        clusters.add(getCluster(graph, id));
      }
    }
    return clusters;
  }
コード例 #7
0
 /**
  * this method creates a set of all nodes that are connected by at least one path (cluster) it
  * starts with on nodeID and uses a breadth first search
  *
  * @param graph the graph
  * @param id of one node of the cluster
  * @return a set of node ids, which are all in the same cluster
  */
 private Set<String> getCluster(WTPGraph graph, String id) {
   Queue<String> queue = new LinkedList<String>();
   Set<String> cluster = new HashSet<String>();
   queue.add(id);
   while (!queue.isEmpty()) {
     String temp = queue.poll();
     if (!visitedWhileClustering.contains(temp)) {
       visitedWhileClustering.add(temp);
       cluster.add(temp);
       Iterator<Node> neighbourNodesIterator =
           graph.getGraph().getNode(temp).getNeighborNodeIterator();
       while (neighbourNodesIterator.hasNext()) {
         queue.add(neighbourNodesIterator.next().getId());
       }
     }
   }
   return cluster;
 }
コード例 #8
0
  private void _genSFPinQueue(List<String> keywords, UUID jobID) {
    // create timestamp
    Date now = Calendar.getInstance().getTime();
    // create jobqueue entry
    jobqueue.put(jobID, new SFPGenJob(SFPGenJob.PROCESSING, now));
    System.out.println("[_genSFPinQueue] size of job queue: " + jobqueue.size());
    System.out.println("Job accessible @ " + this.jobqueue.toString());
    // map for the semantic concepts found in the ontology and their
    // corresponding keyword, used for searching them
    Map<String, String> correspondingKeywords = new HashMap<String, String>();

    KeyWordSearch s = new KeyWordSearch();
    List<SearchResult> res = s.search(keywords, maxSearchResults, correspondingKeywords);
    System.out.println("Resultlist from KW search: " + res);
    List<String> request = KeyWordSearch.toUriList(res);
    System.out.println("Starting BFS...");
    BreadthFirstSearch lc = new BreadthFirstSearch();
    ResultSet result = lc.getConnections(request, maxSearchDepth);
    System.out.println("...Done");

    // -- 2) create the graph
    System.out.println("Creating the initial graph...");
    WTPGraph graph = WTPGraph.createFromResultSet(result, "Semantic Fingerprint");
    System.out.println("...Done");

    // -- 3) remove specific edges
    // graph.removeEdgesByName("ject");
    // graph.removeEdgesByName("paradigm");
    // graph.removeEdgesByName("influencedBy");
    // graph.removeEdgesByName("influenced");
    // graph.removeEdgesByName("typing");
    // graph.removeEdgesByName("license");

    // -- 4) tidy graph
    System.out.print(
        "Tidying graph ("
            + graph.getNodeCount()
            + " Nodes, "
            + graph.getEdgeCount()
            + " Edges) ...");
    GraphCleaner c = new GraphCleaner(graph.getGraph(), result.requestNodes);
    LinkedList<graph.GraphCleaner.Path> paths = c.clean(maxPathLength, maxPathExtensionLength);
    System.out.println(
        " Done ("
            + graph.getNodeCount()
            + " Nodes, "
            + graph.getEdgeCount()
            + " Edges, "
            + paths.size()
            + " Paths)");

    // --4.2) heuristics finger print selection
    InterConceptConntecting heuristic = new InterConceptConntecting();

    /** Filters all Nodes that have paths to other Nodes which correspond to a different keyword */
    // heuristic.filterInterconntection(graph, paths,
    // correspondingKeywords);

    /** Filters the n Nodes which occur most frequently in the paths */
    heuristic.filterNMostFrequentlyOccuring(
        graph, paths, numRelevantNodesFilter, correspondingKeywords);

    /** Selects the cluster which corresponds to the most different keywords */
    heuristic.filterClusterByInterconnectionLevel(graph, correspondingKeywords);

    /** Selects the biggest cluster */
    heuristic.filterClusterBySize(graph);

    /** Selects the cluster whose nodes occur most frequently in the paths */
    // ArrayList<ArrayList<String>> graph = new ArrayList<ArrayString>();
    // convert WTP graph to RDF
    Model rdfgraph = WTPGraph.getRDFGraph(graph);
    rdfgraph.write(System.out);
    /*
     * ObjectMapper mapper = new ObjectMapper();
     *
     *
     * try { return
     * makeCORS(Response.status(Status.OK).entity(mapper.writeValueAsString
     * (rdfgraph.write(System.out))), ""); } catch (JsonGenerationException
     * e) { // TODO Auto-generated catch block e.printStackTrace(); } catch
     * (JsonMappingException e) { // TODO Auto-generated catch block
     * e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated
     * catch block e.printStackTrace();
     *
     * } return makeCORS(Response.status(Status.OK), "");
     */

    OutputStream output =
        new OutputStream() {
          private StringBuilder string = new StringBuilder();

          @Override
          public void write(int b) throws IOException {
            this.string.append((char) b);
          }

          public String toString() {
            return this.string.toString();
          }
        };
    rdfgraph.write((OutputStream) output);
    // put result in sfplist
    this.sfplist.put(jobID, output.toString());

    // get the job object of current jobid and update it
    SFPGenJob currJob = this.jobqueue.get(jobID);
    currJob.updateStatus(SFPGenJob.FINISHED);
    // update timestamp
    now = Calendar.getInstance().getTime();
    currJob.updateTimestamp(now);
    this.jobqueue.put(jobID, currJob);
  }