private static void transitive_reductionInstance(GAction action, G g) throws SLIB_Ex_Critic { int invalidInstanceNb = 0; int annotNbBase = 0; int annotDeleted = 0; Set<URI> instances = GraphAccessor.getInstances(g); logger.info("Cleaning " + RDF.TYPE + " triplets of " + g.getURI()); System.out.println(g); RVF_TAX rvf = new RVF_TAX(g, Direction.IN); // Retrieve descendants for all vertices Map<URI, Set<URI>> descs = rvf.getAllRVClass(); for (URI instance : instances) { Set<URI> redundants = new HashSet<URI>(); Set<URI> classes = g.getV(instance, RDF.TYPE, Direction.OUT); annotNbBase += classes.size(); Iterator<URI> it = classes.iterator(); while (it.hasNext()) { URI c = it.next(); Set<URI> descC = descs.get(c); for (URI c2 : classes) { if (c != c2 && descC.contains(c2)) { redundants.add(c); it.remove(); break; } } } if (!redundants.isEmpty()) { for (URI r : redundants) { g.removeE(new Edge(instance, RDF.TYPE, r)); } invalidInstanceNb++; annotDeleted += redundants.size(); } } double invalidInstanceP = 0; if (instances.size() > 0) { invalidInstanceP = invalidInstanceNb * 100 / instances.size(); } double annotDelP = 0; if (annotNbBase > 0) { annotDelP = annotDeleted * 100 / annotNbBase; } logger.info( "Number of instance containing abnormal annotation: " + invalidInstanceNb + "/" + instances.size() + " i.e. (" + invalidInstanceP + "%)"); logger.info( "Number of annotations: " + annotNbBase + ", deleted: " + annotDeleted + " (" + (annotDelP) + "%), current annotation number " + (annotNbBase - annotDeleted)); }
/** * Reduction of the set of vertices composing the graph. * * <p>------------------------------------------------------------------- IMPORTANT: If modified, * this documentation must also be modified in the class GActionType. * ------------------------------------------------------------------- * * <p>Accepted parameters are: * * <ul> * <li>regex: specify a REGEX in Java syntax which will be used to test if the value associated * to a vertex makes it eligible to be removed. If the value match the REGEX, the vertex * will be removed * <li>vocabulary: Remove all the vertices associated to the vocabularies specified. Accepted * vocabularies flag are RDF, RDFS, OWL. Several vocabularies can be specified using comma * separator. * <li>file_uris: specify a list of files containing URIs corresponding to the vertices to * remove. Multiple files can be specified using comma separator. * </ul> * * @param factory the factory to consider if element requires to be generated (e.g. {@link URI}) * @param action the action to perform * @param g the graph on which the action must be performed * @throws SLIB_Ex_Critic */ private static void verticeReduction(URIFactory factory, GAction action, G g) throws SLIB_Ex_Critic { logger.info("-------------------------------------"); logger.info(" Vertices Reduction"); logger.info("-------------------------------------"); logger.info("Starting " + GActionType.VERTICES_REDUCTION); String regex = (String) action.getParameter("regex"); String vocVal = (String) action.getParameter("vocabulary"); String file_uris = (String) action.getParameter("file_uris"); String rootURIs = (String) action.getParameter("root_uri"); Set<URI> classes = GraphAccessor.getClasses(g); Set<URI> instances = GraphAccessor.getInstances(g); logger.info("Classes : " + classes.size()); logger.info("instances: " + instances.size()); logger.info("vertices : " + g.getV().size()); Set<URI> toRemove = new HashSet<URI>(); if (rootURIs != null) { /* * Reduce the Graph considering all classes subsumed by the given root vertex * Instances annotated by those classes are also conserved into the graph, others are removed. */ logger.info( "Applying reduction of the part of the graph " + g.getURI() + " which is not contained in the graph induced by " + rootURIs + " (only the classes subsumed by the given root are considered)"); try { URI rootURI = factory.getURI(rootURIs); if (!g.containsVertex(rootURI)) { throw new SLIB_Ex_Critic( "Error cannot state vertex associated to URI " + rootURI + " in graph " + g.getURI()); } DescendantEngine descEngine = new DescendantEngine(g); Set<URI> descsInclusive = descEngine.getDescendantsInc(rootURI); logger.info(descsInclusive.size() + " subclasses of " + rootURI + " detected"); int classesNb = classes.size(); Set<URI> classesToRemove = classes; classesToRemove.removeAll(descsInclusive); logger.info( "Removing " + classesToRemove.size() + "/" + classesNb + " classes of the graph"); g.removeV(classesToRemove); // We then remove the entities which are not // linked to the graph current underlying taxonomic graph Set<URI> instancesToRemove = new HashSet<URI>(); for (URI v : instances) { // No links to taxonomic graph anymore // we check the URI as is not considered as both instance and class if (!descsInclusive.contains(v) && g.getV(v, RDF.TYPE, Direction.OUT).isEmpty()) { instancesToRemove.add(v); } } logger.info("Removing " + instancesToRemove.size() + " instances of the graph"); g.removeV(instancesToRemove); } catch (IllegalArgumentException e) { throw new SLIB_Ex_Critic( "Error value specified for parameter root_uri, i.e. " + rootURIs + " cannot be converted into an URI"); } } else if (regex != null) { logger.info("Applying regex: " + regex); Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new SLIB_Ex_Critic( "The specified regex '" + regex + "' is invalid: " + e.getMessage()); } Matcher matcher; for (URI v : g.getV()) { matcher = pattern.matcher(v.stringValue()); if (matcher.find()) { toRemove.add(v); logger.debug("regex matches: " + v); } } logger.info("Vertices to remove: " + toRemove.size() + "/" + g.getV().size()); g.removeV(toRemove); logger.debug("ending " + GActionType.VERTICES_REDUCTION); } else if (vocVal != null) { String[] vocs = vocVal.split(","); for (String voc : vocs) { if (voc.trim().equals("RDF")) { logger.info("Removing RDF vocabulary"); removeVocURIs(factory, getRDFVocURIs(), g); } else if (voc.trim().equals("RDFS")) { logger.info("Removing RDFS vocabulary"); removeVocURIs(factory, getRDFSVocURIs(), g); } else if (voc.trim().equals("OWL")) { logger.info("Removing OWL vocabulary"); removeVocURIs(factory, getOWLVocURIs(), g); } } } else if (file_uris != null) { String[] files = file_uris.split(","); for (String f : files) { logger.info("Removing Uris specified in " + f); try { FileInputStream fstream = new FileInputStream(f.trim()); DataInputStream in = new DataInputStream(fstream); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String line; while ((line = br.readLine()) != null) { line = line.trim(); g.removeV(factory.getURI(line)); } in.close(); } catch (IOException e) { throw new SLIB_Ex_Critic(e.getMessage()); } } } logger.info("vertices reduction performed"); logger.info("-------------------------------------"); }