Esempio n. 1
0
  private static void transitive_reductionInstance(GAction action, G g) throws SLIB_Ex_Critic {

    int invalidInstanceNb = 0;
    int annotNbBase = 0;
    int annotDeleted = 0;

    Set<URI> instances = GraphAccessor.getInstances(g);

    logger.info("Cleaning " + RDF.TYPE + " triplets of " + g.getURI());
    System.out.println(g);

    RVF_TAX rvf = new RVF_TAX(g, Direction.IN);

    // Retrieve descendants for all vertices
    Map<URI, Set<URI>> descs = rvf.getAllRVClass();

    for (URI instance : instances) {

      Set<URI> redundants = new HashSet<URI>();
      Set<URI> classes = g.getV(instance, RDF.TYPE, Direction.OUT);

      annotNbBase += classes.size();

      Iterator<URI> it = classes.iterator();
      while (it.hasNext()) {

        URI c = it.next();
        Set<URI> descC = descs.get(c);

        for (URI c2 : classes) {

          if (c != c2 && descC.contains(c2)) {
            redundants.add(c);
            it.remove();
            break;
          }
        }
      }

      if (!redundants.isEmpty()) {

        for (URI r : redundants) {
          g.removeE(new Edge(instance, RDF.TYPE, r));
        }
        invalidInstanceNb++;
        annotDeleted += redundants.size();
      }
    }

    double invalidInstanceP = 0;
    if (instances.size() > 0) {
      invalidInstanceP = invalidInstanceNb * 100 / instances.size();
    }

    double annotDelP = 0;
    if (annotNbBase > 0) {
      annotDelP = annotDeleted * 100 / annotNbBase;
    }

    logger.info(
        "Number of instance containing abnormal annotation: "
            + invalidInstanceNb
            + "/"
            + instances.size()
            + "  i.e. ("
            + invalidInstanceP
            + "%)");
    logger.info(
        "Number of annotations: "
            + annotNbBase
            + ", deleted: "
            + annotDeleted
            + " ("
            + (annotDelP)
            + "%), current annotation number "
            + (annotNbBase - annotDeleted));
  }
Esempio n. 2
0
  /**
   * Reduction of the set of vertices composing the graph.
   *
   * <p>------------------------------------------------------------------- IMPORTANT: If modified,
   * this documentation must also be modified in the class GActionType.
   * -------------------------------------------------------------------
   *
   * <p>Accepted parameters are:
   *
   * <ul>
   *   <li>regex: specify a REGEX in Java syntax which will be used to test if the value associated
   *       to a vertex makes it eligible to be removed. If the value match the REGEX, the vertex
   *       will be removed
   *   <li>vocabulary: Remove all the vertices associated to the vocabularies specified. Accepted
   *       vocabularies flag are RDF, RDFS, OWL. Several vocabularies can be specified using comma
   *       separator.
   *   <li>file_uris: specify a list of files containing URIs corresponding to the vertices to
   *       remove. Multiple files can be specified using comma separator.
   * </ul>
   *
   * @param factory the factory to consider if element requires to be generated (e.g. {@link URI})
   * @param action the action to perform
   * @param g the graph on which the action must be performed
   * @throws SLIB_Ex_Critic
   */
  private static void verticeReduction(URIFactory factory, GAction action, G g)
      throws SLIB_Ex_Critic {

    logger.info("-------------------------------------");
    logger.info(" Vertices Reduction");
    logger.info("-------------------------------------");
    logger.info("Starting " + GActionType.VERTICES_REDUCTION);

    String regex = (String) action.getParameter("regex");
    String vocVal = (String) action.getParameter("vocabulary");
    String file_uris = (String) action.getParameter("file_uris");
    String rootURIs = (String) action.getParameter("root_uri");

    Set<URI> classes = GraphAccessor.getClasses(g);
    Set<URI> instances = GraphAccessor.getInstances(g);

    logger.info("Classes  : " + classes.size());
    logger.info("instances: " + instances.size());
    logger.info("vertices : " + g.getV().size());

    Set<URI> toRemove = new HashSet<URI>();

    if (rootURIs != null) {

      /*
       * Reduce the Graph considering all classes subsumed by the given root vertex
       * Instances annotated by those classes are also conserved into the graph, others are removed.
       */
      logger.info(
          "Applying reduction of the part of the graph "
              + g.getURI()
              + " which is not contained in the graph induced by "
              + rootURIs
              + " (only the classes subsumed by the given root are considered)");

      try {
        URI rootURI = factory.getURI(rootURIs);

        if (!g.containsVertex(rootURI)) {
          throw new SLIB_Ex_Critic(
              "Error cannot state vertex associated to URI " + rootURI + " in graph " + g.getURI());
        }

        DescendantEngine descEngine = new DescendantEngine(g);
        Set<URI> descsInclusive = descEngine.getDescendantsInc(rootURI);

        logger.info(descsInclusive.size() + " subclasses of " + rootURI + " detected");

        int classesNb = classes.size();

        Set<URI> classesToRemove = classes;
        classesToRemove.removeAll(descsInclusive);

        logger.info(
            "Removing " + classesToRemove.size() + "/" + classesNb + " classes of the graph");

        g.removeV(classesToRemove);

        // We then remove the entities which are not
        // linked to the graph current underlying taxonomic graph
        Set<URI> instancesToRemove = new HashSet<URI>();

        for (URI v : instances) {

          // No links to taxonomic graph anymore
          // we check the URI as is not considered as both instance and class
          if (!descsInclusive.contains(v) && g.getV(v, RDF.TYPE, Direction.OUT).isEmpty()) {
            instancesToRemove.add(v);
          }
        }

        logger.info("Removing " + instancesToRemove.size() + " instances of the graph");
        g.removeV(instancesToRemove);

      } catch (IllegalArgumentException e) {
        throw new SLIB_Ex_Critic(
            "Error value specified for parameter root_uri, i.e. "
                + rootURIs
                + " cannot be converted into an URI");
      }
    } else if (regex != null) {

      logger.info("Applying regex: " + regex);
      Pattern pattern;

      try {
        pattern = Pattern.compile(regex);
      } catch (PatternSyntaxException e) {
        throw new SLIB_Ex_Critic(
            "The specified regex '" + regex + "' is invalid: " + e.getMessage());
      }

      Matcher matcher;

      for (URI v : g.getV()) {
        matcher = pattern.matcher(v.stringValue());

        if (matcher.find()) {
          toRemove.add(v);
          logger.debug("regex matches: " + v);
        }
      }

      logger.info("Vertices to remove: " + toRemove.size() + "/" + g.getV().size());

      g.removeV(toRemove);

      logger.debug("ending " + GActionType.VERTICES_REDUCTION);
    } else if (vocVal != null) {

      String[] vocs = vocVal.split(",");

      for (String voc : vocs) {

        if (voc.trim().equals("RDF")) {
          logger.info("Removing RDF vocabulary");
          removeVocURIs(factory, getRDFVocURIs(), g);
        } else if (voc.trim().equals("RDFS")) {
          logger.info("Removing RDFS vocabulary");
          removeVocURIs(factory, getRDFSVocURIs(), g);
        } else if (voc.trim().equals("OWL")) {
          logger.info("Removing OWL vocabulary");
          removeVocURIs(factory, getOWLVocURIs(), g);
        }
      }
    } else if (file_uris != null) {

      String[] files = file_uris.split(",");

      for (String f : files) {

        logger.info("Removing Uris specified in " + f);

        try {

          FileInputStream fstream = new FileInputStream(f.trim());
          DataInputStream in = new DataInputStream(fstream);
          BufferedReader br = new BufferedReader(new InputStreamReader(in));

          String line;

          while ((line = br.readLine()) != null) {

            line = line.trim();

            g.removeV(factory.getURI(line));
          }
          in.close();
        } catch (IOException e) {
          throw new SLIB_Ex_Critic(e.getMessage());
        }
      }
    }

    logger.info("vertices reduction performed");
    logger.info("-------------------------------------");
  }