/** * Removes all nodes with non-valid resource classes from the graph. * * @param docGraph * @param validResourceClasses */ private void preProcessGraph(DocGraph docGraph, Set<String> validResourceClasses) { log.info(String.format("Preprocessing DocGraph[%d]", docGraph.getId())); Node n; int cnt = 0; try (Transaction tx = graphDB.beginTx()) { for (Long nodeId : docGraph.getNodes()) { n = graphDB.getNodeById(nodeId); // node's class is resource class and it's not in the valid set if (n.hasProperty(Constants.NODE_SUPER_CLASS_KEY) && n.getProperty(Constants.NODE_SUPER_CLASS_KEY) .equals(Constants.NODE_SUPER_CLASS_RESOURCE_VALUE) && !validResourceClasses.contains(getNodeClass(n))) { try (Transaction innerTx = graphDB.beginTx()) { log.info("Deleting " + n); for (Relationship e : n.getRelationships()) { e.delete(); } n.delete(); innerTx.success(); cnt++; } } } tx.success(); } log.info( String.format("Preprocessing removed %d nodes from DocGraph[%d]", cnt, docGraph.getId())); }
@Override public Map<String, Set<Long>> extract( List<DocGraph> docGraphs, DocGraphFilterFunction filterFunc, Set<String> resourceClasses, Set<String> documentClasses) { // pattern-document graph associations R_D Map<String, Set<Long>> associations = new HashMap<String, Set<Long>>(); // init path finder initFinder(); // G_D(filtered) <- G_D.where({G_d|filter(G_D)}) Set<DocGraph> filteredDocGraphs = new HashSet<DocGraph>(); if (filterFunc == null) { filteredDocGraphs.addAll(docGraphs); } else { for (DocGraph docGraph : docGraphs) { if (filterFunc.filter(docGraph)) { filteredDocGraphs.add(docGraph); } } } Set<Node> relevantResources = null; Set<Node> relevantDocuments = null; String r = null; Node v_p = null; boolean validPath = true; int pCnt = 0; log.info(String.format("Starting analysis of %d document graphs", filteredDocGraphs.size())); for (DocGraph docGraph : filteredDocGraphs) { log.info( String.format( "Analyzing DocGraph[%d] (%d, %d)", docGraph.getId(), docGraph.getNodeCount(), docGraph.getEdgeCount())); pCnt = 0; // V_R <- G_d.V.where({v|tau(mu(v)) in C_R}) relevantResources = filterNodesByClass(docGraph.getNodes(), resourceClasses); // V_R <- G_d.V.where({v|tau(mu(v)) in C_D}) relevantDocuments = filterNodesByClass(docGraph.getNodes(), documentClasses); preProcessGraph(docGraph, resourceClasses); try (Transaction tx = graphDB.beginTx()) { for (Node v_r : relevantResources) { for (Node v_d : relevantDocuments) { // P <- G_d.paths(v_R,v_D) for (Path p : finder.findAllPaths(v_r, v_d)) { validPath = true; r = String.format("%s", v_r.getProperty(Constants.ID_KEY)); v_p = v_r; for (Relationship e : p.relationships()) { if (e.getStartNode().getId() == v_p.getId()) { if (getNodeSuperClass(e.getEndNode()) .equals(Constants.NODE_SUPER_CLASS_RESOURCE_VALUE)) { validPath = false; break; } r = String.format( "%s-%s->%s", r, e.getType().name(), getNodeClass(e.getEndNode())); v_p = e.getEndNode(); } else { if (getNodeSuperClass(e.getStartNode()) .equals(Constants.NODE_SUPER_CLASS_RESOURCE_VALUE)) { validPath = false; break; } r = String.format( "%s<-%s-%s", r, e.getType().name(), getNodeClass(e.getStartNode())); v_p = e.getStartNode(); } } // R_D.add(<r, G_d>) if (validPath) { associations = addPatternDocGraphAssociation(associations, r, docGraph); pCnt++; } } } } tx.success(); } log.info(String.format("%d patterns in DocGraph[%d]", pCnt, docGraph.getId())); } return associations; }