/**
   * Builds a String result for Elastic Search from an RDFNode
   *
   * @param node An RDFNode representing the value of a property for a given resource
   * @return If the RDFNode has a Literal value, among Boolean, Byte, Double, Float, Integer Long,
   *     Short, this value is returned, converted to String
   *     <p>If the RDFNode has a String Literal value, this value will be returned, surrounded by
   *     double quotes
   *     <p>If the RDFNode has a Resource value (URI) and toDescribeURIs is set to true, the value
   *     of @getLabelForUri for the resource is returned, surrounded by double quotes. Otherwise,
   *     the URI will be returned
   */
  private String getStringForResult(RDFNode node, boolean getNodeLabel) {
    String result = "";
    boolean quote = false;

    if (node.isLiteral()) {
      Object literalValue = node.asLiteral().getValue();
      try {
        Class<?> literalJavaClass = node.asLiteral().getDatatype().getJavaClass();

        if (literalJavaClass.equals(Boolean.class)
            || Number.class.isAssignableFrom(literalJavaClass)) {

          result += literalValue;
        } else {
          result = EEASettings.parseForJson(node.asLiteral().getLexicalForm());
          quote = true;
        }
      } catch (java.lang.NullPointerException npe) {
        result = EEASettings.parseForJson(node.asLiteral().getLexicalForm());
        quote = true;
      }

    } else if (node.isResource()) {
      result = node.asResource().getURI();
      if (getNodeLabel) {
        result = getLabelForUri(result);
      }
      quote = true;
    }
    if (quote) {
      result = "\"" + result + "\"";
    }
    return result;
  }
  /**
   * Query SPARQL endpoint with a SELECT query
   *
   * @param qExec QueryExecution encapsulating the query
   * @return model retrieved by querying the endpoint
   */
  private Model getSelectModel(QueryExecution qExec) {
    Model model = ModelFactory.createDefaultModel();
    Graph graph = model.getGraph();
    ResultSet results = qExec.execSelect();

    while (results.hasNext()) {
      QuerySolution sol = results.next();
      String subject;
      String predicate;
      RDFNode object;

      try {
        subject = sol.getResource("s").toString();
        predicate = sol.getResource("p").toString();
        object = sol.get("o");
      } catch (NoSuchElementException e) {
        logger.error("SELECT query does not return a (?s ?p ?o) Triple");
        continue;
      }

      Node objNode;
      if (object.isLiteral()) {
        Literal obj = object.asLiteral();
        objNode = NodeFactory.createLiteral(obj.getString(), obj.getDatatype());
      } else {
        objNode = NodeFactory.createLiteral(object.toString());
      }

      graph.add(
          new Triple(NodeFactory.createURI(subject), NodeFactory.createURI(predicate), objNode));
    }

    return model;
  }
Пример #3
0
  @Test
  public void testSelectToWurcsSparql() throws SparqlException, UnsupportedEncodingException {
    GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct");
    SparqlEntity se = new SparqlEntity();
    se.setValue(
        GlycoSequenceToWurcsSelectSparql.FromSequence,
        "RES\n1b:a-dgal-HEX-1:5\n2s:n-acetyl\n3b:b-dgal-HEX-1:5\n4b:b-dglc-HEX-1:5\n5s:n-acetyl\n6b:b-dgal-HEX-1:5\n7b:a-lgal-HEX-1:5|6:d\n8b:b-dglc-HEX-1:5\n9s:n-acetyl\n10b:b-dglc-HEX-1:5\n11s:n-acetyl\n12b:b-dgal-HEX-1:5\n13b:a-lgal-HEX-1:5|6:d\nLIN\n1:1d(2+1)2n\n2:1o(3+1)3d\n3:3o(3+1)4d\n4:4d(2+1)5n\n5:4o(4+1)6d\n6:6o(2+1)7d\n7:3o(6+1)8d\n8:8d(2+1)9n\n9:1o(6+1)10d\n10:10d(2+1)11n\n11:10o(4+1)12d\n12:12o(2+1)13d"
            .replaceAll("\n", "\\\\n"));
    s.setSparqlEntity(se);
    logger.debug(s.getSparql());
    Query query =
        QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", ""));
    //        QueryExecution qe =
    // QueryExecutionFactory.sparqlService("http://localhost:3030/glycobase/query",query);
    QueryExecution qe =
        QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query);
    ResultSet rs = qe.execSelect();

    List<SparqlEntity> results = new ArrayList<SparqlEntity>();

    while (rs.hasNext()) {
      QuerySolution row = rs.next();
      Iterator<String> columns = row.varNames();
      SparqlEntity se2 = new SparqlEntity();
      while (columns.hasNext()) {
        String column = columns.next();
        RDFNode cell = row.get(column);

        if (cell.isResource()) {
          Resource resource = cell.asResource();
          // do something maybe with the OntModel???
          if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString());
          else se.setValue(column, resource.toString());
        } else if (cell.isLiteral()) {
          se.setValue(column, cell.asLiteral().getString());
        } else if (cell.isAnon()) {
          se.setValue(column, "anon");
        } else {
          se.setValue(column, cell.toString());
        }
      }
      results.add(se);
    }

    for (SparqlEntity entity : results) {
      System.out.println("results: " + entity.getValue("PrimaryId"));
    }
  }
Пример #4
0
 public int[] getSourceColumns(Resource resource) {
   if (_model.contains(resource, Vertere.source_column)) {
     Statement sourceColumn = _model.getProperty(resource, Vertere.source_column);
     return new int[] {sourceColumn.getInt()};
   } else if (_model.contains(resource, Vertere.source_columns)) {
     Statement sourceColumns = _model.getProperty(resource, Vertere.source_columns);
     Resource listResource = sourceColumns.getResource();
     RDFList list = listResource.as(RDFList.class);
     List<RDFNode> javalist = list.asJavaList();
     int[] sourceColumnNumbers = new int[javalist.size()];
     for (int i = 0; i < javalist.size(); i++) {
       RDFNode node = javalist.get(i);
       Literal value = node.asLiteral();
       sourceColumnNumbers[i] = value.getInt();
     }
     return sourceColumnNumbers;
   } else {
     return new int[0];
   }
 }
  @Override
  public EntityDefinition open(Assembler a, Resource root, Mode mode) {
    String prologue = "PREFIX : <" + NS + ">   PREFIX list: <http://jena.apache.org/ARQ/list#> ";
    Model model = root.getModel();

    String qs1 =
        StrUtils.strjoinNL(
            prologue,
            "SELECT * {",
            "  ?eMap  :entityField  ?entityField ;",
            "         :map ?map ;",
            "         :defaultField ?dftField .",
            "  OPTIONAL {",
            "    ?eMap :graphField ?graphField",
            "  }",
            "  OPTIONAL {",
            "    ?eMap :langField ?langField",
            "  }",
            "  OPTIONAL {",
            "    ?eMap :uidField ?uidField",
            "  }",
            "}");
    ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1);
    pss.setIri("eMap", root.getURI());

    Query query1 = QueryFactory.create(pss.toString());
    QueryExecution qexec1 = QueryExecutionFactory.create(query1, model);
    ResultSet rs1 = qexec1.execSelect();
    List<QuerySolution> results = ResultSetFormatter.toList(rs1);
    if (results.size() == 0) {
      Log.warn(this, "Failed to find a valid EntityMap for : " + root);
      throw new TextIndexException("Failed to find a valid EntityMap for : " + root);
    }

    if (results.size() != 1) {
      Log.warn(this, "Multiple matches for EntityMap for : " + root);
      throw new TextIndexException("Multiple matches for EntityMap for : " + root);
    }

    QuerySolution qsol1 = results.get(0);
    String entityField = qsol1.getLiteral("entityField").getLexicalForm();
    String graphField =
        qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null;
    String langField =
        qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null;
    String defaultField =
        qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null;
    String uniqueIdField =
        qsol1.contains("uidField") ? qsol1.getLiteral("uidField").getLexicalForm() : null;

    Multimap<String, Node> mapDefs = HashMultimap.create();
    Map<String, Analyzer> analyzerDefs = new HashMap<>();

    Statement listStmt = root.getProperty(TextVocab.pMap);
    while (listStmt != null) {
      RDFNode n = listStmt.getObject();
      if (!n.isResource()) {
        throw new TextIndexException("Text list node is not a resource : " + n);
      }
      Resource listResource = n.asResource();
      if (listResource.equals(RDF.nil)) {
        break; // end of the list
      }

      Statement listEntryStmt = listResource.getProperty(RDF.first);
      if (listEntryStmt == null) {
        throw new TextIndexException("Text map list is not well formed.  No rdf:first property");
      }
      n = listEntryStmt.getObject();
      if (!n.isResource()) {
        throw new TextIndexException("Text map list entry is not a resource : " + n);
      }
      Resource listEntry = n.asResource();

      Statement fieldStatement = listEntry.getProperty(TextVocab.pField);
      if (fieldStatement == null) {
        throw new TextIndexException("Text map entry has no field property");
      }
      n = fieldStatement.getObject();
      if (!n.isLiteral()) {
        throw new TextIndexException("Text map entry field property has no literal value : " + n);
      }
      String field = n.asLiteral().getLexicalForm();

      Statement predicateStatement = listEntry.getProperty(TextVocab.pPredicate);
      if (predicateStatement == null) {
        throw new TextIndexException("Text map entry has no predicate property");
      }
      n = predicateStatement.getObject();
      if (!n.isURIResource()) {
        throw new TextIndexException(
            "Text map entry predicate property has non resource value : " + n);
      }
      mapDefs.put(field, n.asNode());

      Statement analyzerStatement = listEntry.getProperty(TextVocab.pAnalyzer);
      if (analyzerStatement != null) {
        n = analyzerStatement.getObject();
        if (!n.isResource()) {
          throw new TextIndexException("Text map entry analyzer property is not a resource : " + n);
        }
        Resource analyzerResource = n.asResource();
        Analyzer analyzer = (Analyzer) a.open(analyzerResource);
        analyzerDefs.put(field, analyzer);
      }

      // move on to the next element in the list
      listStmt = listResource.getProperty(RDF.rest);
    }

    // Primary field/predicate
    if (defaultField != null) {
      Collection<Node> c = mapDefs.get(defaultField);
      if (c.isEmpty())
        throw new TextIndexException("No definition of primary field '" + defaultField + "'");
    }

    EntityDefinition docDef = new EntityDefinition(entityField, defaultField);
    docDef.setGraphField(graphField);
    docDef.setLangField(langField);
    docDef.setUidField(uniqueIdField);
    for (String f : mapDefs.keys()) {
      for (Node p : mapDefs.get(f)) docDef.set(f, p);
    }
    for (String f : analyzerDefs.keySet()) {
      docDef.setAnalyzer(f, analyzerDefs.get(f));
    }
    return docDef;
  }
Пример #6
0
  @Test
  public void testKBtoWurcsSparqlTranslation() throws SparqlException {

    List<Translation> translations = Ebean.find(Translation.class).findList();
    HashSet<String> resultList = new HashSet<>();

    String ct = "";

    for (Translation translation : translations) {
      System.out.println("id check " + translation.id + " ct " + translation.ct);
      if (translation.ct == null) continue;

      if (translation.structure.id > 0) {

        ct = translation.ct;

        GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct");
        SparqlEntity se = new SparqlEntity();
        ct = StringUtils.chomp(ct);
        System.out.println("ct on top: " + ct);
        if (ct != null) {
          se.setValue(
              GlycoSequenceToWurcsSelectSparql.FromSequence,
              ct.replaceAll("\n", "\\\\n")
                  .replaceAll("x\\(", "u\\(")
                  .replaceAll("\\)x", "\\)u")
                  .trim());
          s.setSparqlEntity(se);
          logger.debug(s.getSparql());

          Query query =
              QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", ""));
          System.out.println(
              "Id "
                  + translation.structure.id
                  + " Query: "
                  + s.getSparql().replaceAll("null", "").replace("?Sequence", ""));
          QueryExecution qe =
              QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query);
          ResultSet rs = qe.execSelect();

          List<SparqlEntity> results = new ArrayList<>();
          HashSet<String> resultsList = new HashSet<>();

          while (rs.hasNext()) {
            QuerySolution row = rs.next();
            Iterator<String> columns = row.varNames();
            SparqlEntity se2 = new SparqlEntity();
            while (columns.hasNext()) {
              String column = columns.next();
              RDFNode cell = row.get(column);

              if (cell.isResource()) {
                Resource resource = cell.asResource();
                // do something maybe with the OntModel???
                if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString());
                else se.setValue(column, resource.toString());
              } else if (cell.isLiteral()) {
                se.setValue(column, cell.asLiteral().getString());
              } else if (cell.isAnon()) {
                se.setValue(column, "anon");
              } else {
                se.setValue(column, cell.toString());
              }
            }
            results.add(se);
          }

          for (SparqlEntity entity : results) {
            // System.out.println("results: " + entity.getValue("PrimaryId"));
            resultList.add(
                translation.structure.id + "\t" + entity.getValue("PrimaryId").toString());
          }
        }
      }
    }

    for (String c : resultList) {
      System.out.println(c);
    }
  }
Пример #7
0
  @Test
  public void testKBtoWurcsSparql() throws SparqlException {

    List<Structure> structures = Ebean.find(Structure.class).findList();
    HashSet<String> resultList = new HashSet<>();

    String ct = "";

    for (Structure structure : structures) {
      if (structure.id >= 7400) {

        if (structure.glycanst.startsWith("v--")) {
          structure.glycanst = structure.glycanst.replace("v--", "FreeEnd--");
        }

        if (structure.glycanst.startsWith("FreenEnd")) {
          structure.glycanst = structure.glycanst.replace("FreenEnd", "FreeEnd");
        }

        if (structure.glycanst.startsWith("FreeEnd?")) {
          structure.glycanst = structure.glycanst.replace("FreeEnd?", "FreeEnd--?");
        }

        if (structure.glycanst.startsWith("<Gly") || structure.glycanst.contains("0.0000u")) {
          continue;
        }

        System.out.println(structure.getGlycanst());

        BuilderWorkspace workspace = new BuilderWorkspace(new GlycanRendererAWT());
        workspace.setNotation("cfg"); // cfgbw | uoxf | uoxfcol | text
        GlycanRenderer renderer = workspace.getGlycanRenderer();
        org.eurocarbdb.application.glycanbuilder.Glycan glycan =
            org.eurocarbdb.application.glycanbuilder.Glycan.fromString(structure.glycanst.trim());
        if (glycan != null) {
          ct = glycan.toGlycoCTCondensed();
          System.out.println("this was the ct: " + ct);
          GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct");
          SparqlEntity se = new SparqlEntity();
          ct = StringUtils.chomp(ct);
          se.setValue(
              GlycoSequenceToWurcsSelectSparql.FromSequence,
              ct.replaceAll("\n", "\\\\n")
                  .replaceAll("x\\(", "u\\(")
                  .replaceAll("\\)x", "\\)u")
                  .trim());
          s.setSparqlEntity(se);
          logger.debug(s.getSparql());

          Query query =
              QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", ""));
          System.out.println(
              "Id "
                  + structure.id
                  + " Query: "
                  + s.getSparql().replaceAll("null", "").replace("?Sequence", ""));
          QueryExecution qe =
              QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query);
          ResultSet rs = qe.execSelect();

          List<SparqlEntity> results = new ArrayList<>();
          HashSet<String> resultsList = new HashSet<>();

          while (rs.hasNext()) {
            QuerySolution row = rs.next();
            Iterator<String> columns = row.varNames();
            SparqlEntity se2 = new SparqlEntity();
            while (columns.hasNext()) {
              String column = columns.next();
              RDFNode cell = row.get(column);

              if (cell.isResource()) {
                Resource resource = cell.asResource();
                // do something maybe with the OntModel???
                if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString());
                else se.setValue(column, resource.toString());
              } else if (cell.isLiteral()) {
                se.setValue(column, cell.asLiteral().getString());
              } else if (cell.isAnon()) {
                se.setValue(column, "anon");
              } else {
                se.setValue(column, cell.toString());
              }
            }
            results.add(se);
          }

          for (SparqlEntity entity : results) {
            // System.out.println("results: " + entity.getValue("PrimaryId"));
            resultList.add(structure.id + "\t" + entity.getValue("PrimaryId").toString());
          }
        }
      }
    }
    PrintWriter writer = null;
    try {
      writer =
          new PrintWriter(
              new OutputStreamWriter(new FileOutputStream("/tmp/HashSet.txt"), "UTF-8"));
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }
    for (String c : resultList) {
      System.out.println(c);
      writer.println(c);
    }
  }
  /**
   * Get JSON map for a given resource by applying the river settings
   *
   * @param rs resource being processed
   * @param properties properties to be indexed
   * @param model model returned by the indexing query
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   * @return map of properties to be indexed for res
   */
  private Map<String, ArrayList<String>> getJsonMap(
      Resource rs, Set<Property> properties, Model model, boolean getPropLabel) {
    Map<String, ArrayList<String>> jsonMap = new HashMap<String, ArrayList<String>>();
    ArrayList<String> results = new ArrayList<String>();

    if (addUriForResource) {
      results.add("\"" + rs.toString() + "\"");
      jsonMap.put("http://www.w3.org/1999/02/22-rdf-syntax-ns#about", results);
    }

    Set<String> rdfLanguages = new HashSet<String>();

    for (Property prop : properties) {
      NodeIterator niter = model.listObjectsOfProperty(rs, prop);
      String property = prop.toString();
      results = new ArrayList<String>();

      String lang;
      String currValue;

      while (niter.hasNext()) {
        RDFNode node = niter.next();
        currValue = getStringForResult(node, getPropLabel);
        if (addLanguage) {
          if (node.isLiteral()) {
            lang = node.asLiteral().getLanguage();
            if (!lang.isEmpty()) {
              rdfLanguages.add("\"" + lang + "\"");
            }
          }
        }

        String shortValue = currValue;

        int currLen = currValue.length();
        // Unquote string
        if (currLen > 1) shortValue = currValue.substring(1, currLen - 1);

        // If either whiteMap does contains shortValue
        // or blackMap contains the value
        // skip adding it to the index
        boolean whiteMapCond =
            whiteMap.containsKey(property) && !whiteMap.get(property).contains(shortValue);
        boolean blackMapCond =
            blackMap.containsKey(property) && blackMap.get(property).contains(shortValue);

        if (whiteMapCond || blackMapCond) {
          continue;
        }

        if (normalizeObj.containsKey(shortValue)) {
          results.add("\"" + normalizeObj.get(shortValue) + "\"");
        } else {
          results.add(currValue);
        }
      }

      // Do not index empty properties
      if (results.isEmpty()) continue;

      if (normalizeProp.containsKey(property)) {
        property = normalizeProp.get(property);
        if (jsonMap.containsKey(property)) {
          jsonMap.get(property).addAll(results);
        } else {
          jsonMap.put(property, results);
        }
      } else {
        jsonMap.put(property, results);
      }
    }

    if (addLanguage) {
      if (rdfLanguages.isEmpty() && !language.isEmpty()) rdfLanguages.add(language);
      if (!rdfLanguages.isEmpty()) jsonMap.put("language", new ArrayList<String>(rdfLanguages));
    }

    for (Map.Entry<String, String> it : normalizeMissing.entrySet()) {
      if (!jsonMap.containsKey(it.getKey())) {
        ArrayList<String> res = new ArrayList<String>();
        res.add("\"" + it.getValue() + "\"");
        jsonMap.put(it.getKey(), res);
      }
    }

    return jsonMap;
  }