/** * Builds a String result for Elastic Search from an RDFNode * * @param node An RDFNode representing the value of a property for a given resource * @return If the RDFNode has a Literal value, among Boolean, Byte, Double, Float, Integer Long, * Short, this value is returned, converted to String * <p>If the RDFNode has a String Literal value, this value will be returned, surrounded by * double quotes * <p>If the RDFNode has a Resource value (URI) and toDescribeURIs is set to true, the value * of @getLabelForUri for the resource is returned, surrounded by double quotes. Otherwise, * the URI will be returned */ private String getStringForResult(RDFNode node, boolean getNodeLabel) { String result = ""; boolean quote = false; if (node.isLiteral()) { Object literalValue = node.asLiteral().getValue(); try { Class<?> literalJavaClass = node.asLiteral().getDatatype().getJavaClass(); if (literalJavaClass.equals(Boolean.class) || Number.class.isAssignableFrom(literalJavaClass)) { result += literalValue; } else { result = EEASettings.parseForJson(node.asLiteral().getLexicalForm()); quote = true; } } catch (java.lang.NullPointerException npe) { result = EEASettings.parseForJson(node.asLiteral().getLexicalForm()); quote = true; } } else if (node.isResource()) { result = node.asResource().getURI(); if (getNodeLabel) { result = getLabelForUri(result); } quote = true; } if (quote) { result = "\"" + result + "\""; } return result; }
/** * Query SPARQL endpoint with a SELECT query * * @param qExec QueryExecution encapsulating the query * @return model retrieved by querying the endpoint */ private Model getSelectModel(QueryExecution qExec) { Model model = ModelFactory.createDefaultModel(); Graph graph = model.getGraph(); ResultSet results = qExec.execSelect(); while (results.hasNext()) { QuerySolution sol = results.next(); String subject; String predicate; RDFNode object; try { subject = sol.getResource("s").toString(); predicate = sol.getResource("p").toString(); object = sol.get("o"); } catch (NoSuchElementException e) { logger.error("SELECT query does not return a (?s ?p ?o) Triple"); continue; } Node objNode; if (object.isLiteral()) { Literal obj = object.asLiteral(); objNode = NodeFactory.createLiteral(obj.getString(), obj.getDatatype()); } else { objNode = NodeFactory.createLiteral(object.toString()); } graph.add( new Triple(NodeFactory.createURI(subject), NodeFactory.createURI(predicate), objNode)); } return model; }
@Test public void testSelectToWurcsSparql() throws SparqlException, UnsupportedEncodingException { GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct"); SparqlEntity se = new SparqlEntity(); se.setValue( GlycoSequenceToWurcsSelectSparql.FromSequence, "RES\n1b:a-dgal-HEX-1:5\n2s:n-acetyl\n3b:b-dgal-HEX-1:5\n4b:b-dglc-HEX-1:5\n5s:n-acetyl\n6b:b-dgal-HEX-1:5\n7b:a-lgal-HEX-1:5|6:d\n8b:b-dglc-HEX-1:5\n9s:n-acetyl\n10b:b-dglc-HEX-1:5\n11s:n-acetyl\n12b:b-dgal-HEX-1:5\n13b:a-lgal-HEX-1:5|6:d\nLIN\n1:1d(2+1)2n\n2:1o(3+1)3d\n3:3o(3+1)4d\n4:4d(2+1)5n\n5:4o(4+1)6d\n6:6o(2+1)7d\n7:3o(6+1)8d\n8:8d(2+1)9n\n9:1o(6+1)10d\n10:10d(2+1)11n\n11:10o(4+1)12d\n12:12o(2+1)13d" .replaceAll("\n", "\\\\n")); s.setSparqlEntity(se); logger.debug(s.getSparql()); Query query = QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", "")); // QueryExecution qe = // QueryExecutionFactory.sparqlService("http://localhost:3030/glycobase/query",query); QueryExecution qe = QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query); ResultSet rs = qe.execSelect(); List<SparqlEntity> results = new ArrayList<SparqlEntity>(); while (rs.hasNext()) { QuerySolution row = rs.next(); Iterator<String> columns = row.varNames(); SparqlEntity se2 = new SparqlEntity(); while (columns.hasNext()) { String column = columns.next(); RDFNode cell = row.get(column); if (cell.isResource()) { Resource resource = cell.asResource(); // do something maybe with the OntModel??? if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString()); else se.setValue(column, resource.toString()); } else if (cell.isLiteral()) { se.setValue(column, cell.asLiteral().getString()); } else if (cell.isAnon()) { se.setValue(column, "anon"); } else { se.setValue(column, cell.toString()); } } results.add(se); } for (SparqlEntity entity : results) { System.out.println("results: " + entity.getValue("PrimaryId")); } }
public int[] getSourceColumns(Resource resource) { if (_model.contains(resource, Vertere.source_column)) { Statement sourceColumn = _model.getProperty(resource, Vertere.source_column); return new int[] {sourceColumn.getInt()}; } else if (_model.contains(resource, Vertere.source_columns)) { Statement sourceColumns = _model.getProperty(resource, Vertere.source_columns); Resource listResource = sourceColumns.getResource(); RDFList list = listResource.as(RDFList.class); List<RDFNode> javalist = list.asJavaList(); int[] sourceColumnNumbers = new int[javalist.size()]; for (int i = 0; i < javalist.size(); i++) { RDFNode node = javalist.get(i); Literal value = node.asLiteral(); sourceColumnNumbers[i] = value.getInt(); } return sourceColumnNumbers; } else { return new int[0]; } }
@Override public EntityDefinition open(Assembler a, Resource root, Mode mode) { String prologue = "PREFIX : <" + NS + "> PREFIX list: <http://jena.apache.org/ARQ/list#> "; Model model = root.getModel(); String qs1 = StrUtils.strjoinNL( prologue, "SELECT * {", " ?eMap :entityField ?entityField ;", " :map ?map ;", " :defaultField ?dftField .", " OPTIONAL {", " ?eMap :graphField ?graphField", " }", " OPTIONAL {", " ?eMap :langField ?langField", " }", " OPTIONAL {", " ?eMap :uidField ?uidField", " }", "}"); ParameterizedSparqlString pss = new ParameterizedSparqlString(qs1); pss.setIri("eMap", root.getURI()); Query query1 = QueryFactory.create(pss.toString()); QueryExecution qexec1 = QueryExecutionFactory.create(query1, model); ResultSet rs1 = qexec1.execSelect(); List<QuerySolution> results = ResultSetFormatter.toList(rs1); if (results.size() == 0) { Log.warn(this, "Failed to find a valid EntityMap for : " + root); throw new TextIndexException("Failed to find a valid EntityMap for : " + root); } if (results.size() != 1) { Log.warn(this, "Multiple matches for EntityMap for : " + root); throw new TextIndexException("Multiple matches for EntityMap for : " + root); } QuerySolution qsol1 = results.get(0); String entityField = qsol1.getLiteral("entityField").getLexicalForm(); String graphField = qsol1.contains("graphField") ? qsol1.getLiteral("graphField").getLexicalForm() : null; String langField = qsol1.contains("langField") ? qsol1.getLiteral("langField").getLexicalForm() : null; String defaultField = qsol1.contains("dftField") ? qsol1.getLiteral("dftField").getLexicalForm() : null; String uniqueIdField = qsol1.contains("uidField") ? qsol1.getLiteral("uidField").getLexicalForm() : null; Multimap<String, Node> mapDefs = HashMultimap.create(); Map<String, Analyzer> analyzerDefs = new HashMap<>(); Statement listStmt = root.getProperty(TextVocab.pMap); while (listStmt != null) { RDFNode n = listStmt.getObject(); if (!n.isResource()) { throw new TextIndexException("Text list node is not a resource : " + n); } Resource listResource = n.asResource(); if (listResource.equals(RDF.nil)) { break; // end of the list } Statement listEntryStmt = listResource.getProperty(RDF.first); if (listEntryStmt == null) { throw new TextIndexException("Text map list is not well formed. No rdf:first property"); } n = listEntryStmt.getObject(); if (!n.isResource()) { throw new TextIndexException("Text map list entry is not a resource : " + n); } Resource listEntry = n.asResource(); Statement fieldStatement = listEntry.getProperty(TextVocab.pField); if (fieldStatement == null) { throw new TextIndexException("Text map entry has no field property"); } n = fieldStatement.getObject(); if (!n.isLiteral()) { throw new TextIndexException("Text map entry field property has no literal value : " + n); } String field = n.asLiteral().getLexicalForm(); Statement predicateStatement = listEntry.getProperty(TextVocab.pPredicate); if (predicateStatement == null) { throw new TextIndexException("Text map entry has no predicate property"); } n = predicateStatement.getObject(); if (!n.isURIResource()) { throw new TextIndexException( "Text map entry predicate property has non resource value : " + n); } mapDefs.put(field, n.asNode()); Statement analyzerStatement = listEntry.getProperty(TextVocab.pAnalyzer); if (analyzerStatement != null) { n = analyzerStatement.getObject(); if (!n.isResource()) { throw new TextIndexException("Text map entry analyzer property is not a resource : " + n); } Resource analyzerResource = n.asResource(); Analyzer analyzer = (Analyzer) a.open(analyzerResource); analyzerDefs.put(field, analyzer); } // move on to the next element in the list listStmt = listResource.getProperty(RDF.rest); } // Primary field/predicate if (defaultField != null) { Collection<Node> c = mapDefs.get(defaultField); if (c.isEmpty()) throw new TextIndexException("No definition of primary field '" + defaultField + "'"); } EntityDefinition docDef = new EntityDefinition(entityField, defaultField); docDef.setGraphField(graphField); docDef.setLangField(langField); docDef.setUidField(uniqueIdField); for (String f : mapDefs.keys()) { for (Node p : mapDefs.get(f)) docDef.set(f, p); } for (String f : analyzerDefs.keySet()) { docDef.setAnalyzer(f, analyzerDefs.get(f)); } return docDef; }
@Test public void testKBtoWurcsSparqlTranslation() throws SparqlException { List<Translation> translations = Ebean.find(Translation.class).findList(); HashSet<String> resultList = new HashSet<>(); String ct = ""; for (Translation translation : translations) { System.out.println("id check " + translation.id + " ct " + translation.ct); if (translation.ct == null) continue; if (translation.structure.id > 0) { ct = translation.ct; GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct"); SparqlEntity se = new SparqlEntity(); ct = StringUtils.chomp(ct); System.out.println("ct on top: " + ct); if (ct != null) { se.setValue( GlycoSequenceToWurcsSelectSparql.FromSequence, ct.replaceAll("\n", "\\\\n") .replaceAll("x\\(", "u\\(") .replaceAll("\\)x", "\\)u") .trim()); s.setSparqlEntity(se); logger.debug(s.getSparql()); Query query = QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", "")); System.out.println( "Id " + translation.structure.id + " Query: " + s.getSparql().replaceAll("null", "").replace("?Sequence", "")); QueryExecution qe = QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query); ResultSet rs = qe.execSelect(); List<SparqlEntity> results = new ArrayList<>(); HashSet<String> resultsList = new HashSet<>(); while (rs.hasNext()) { QuerySolution row = rs.next(); Iterator<String> columns = row.varNames(); SparqlEntity se2 = new SparqlEntity(); while (columns.hasNext()) { String column = columns.next(); RDFNode cell = row.get(column); if (cell.isResource()) { Resource resource = cell.asResource(); // do something maybe with the OntModel??? if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString()); else se.setValue(column, resource.toString()); } else if (cell.isLiteral()) { se.setValue(column, cell.asLiteral().getString()); } else if (cell.isAnon()) { se.setValue(column, "anon"); } else { se.setValue(column, cell.toString()); } } results.add(se); } for (SparqlEntity entity : results) { // System.out.println("results: " + entity.getValue("PrimaryId")); resultList.add( translation.structure.id + "\t" + entity.getValue("PrimaryId").toString()); } } } } for (String c : resultList) { System.out.println(c); } }
@Test public void testKBtoWurcsSparql() throws SparqlException { List<Structure> structures = Ebean.find(Structure.class).findList(); HashSet<String> resultList = new HashSet<>(); String ct = ""; for (Structure structure : structures) { if (structure.id >= 7400) { if (structure.glycanst.startsWith("v--")) { structure.glycanst = structure.glycanst.replace("v--", "FreeEnd--"); } if (structure.glycanst.startsWith("FreenEnd")) { structure.glycanst = structure.glycanst.replace("FreenEnd", "FreeEnd"); } if (structure.glycanst.startsWith("FreeEnd?")) { structure.glycanst = structure.glycanst.replace("FreeEnd?", "FreeEnd--?"); } if (structure.glycanst.startsWith("<Gly") || structure.glycanst.contains("0.0000u")) { continue; } System.out.println(structure.getGlycanst()); BuilderWorkspace workspace = new BuilderWorkspace(new GlycanRendererAWT()); workspace.setNotation("cfg"); // cfgbw | uoxf | uoxfcol | text GlycanRenderer renderer = workspace.getGlycanRenderer(); org.eurocarbdb.application.glycanbuilder.Glycan glycan = org.eurocarbdb.application.glycanbuilder.Glycan.fromString(structure.glycanst.trim()); if (glycan != null) { ct = glycan.toGlycoCTCondensed(); System.out.println("this was the ct: " + ct); GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct"); SparqlEntity se = new SparqlEntity(); ct = StringUtils.chomp(ct); se.setValue( GlycoSequenceToWurcsSelectSparql.FromSequence, ct.replaceAll("\n", "\\\\n") .replaceAll("x\\(", "u\\(") .replaceAll("\\)x", "\\)u") .trim()); s.setSparqlEntity(se); logger.debug(s.getSparql()); Query query = QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", "")); System.out.println( "Id " + structure.id + " Query: " + s.getSparql().replaceAll("null", "").replace("?Sequence", "")); QueryExecution qe = QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query); ResultSet rs = qe.execSelect(); List<SparqlEntity> results = new ArrayList<>(); HashSet<String> resultsList = new HashSet<>(); while (rs.hasNext()) { QuerySolution row = rs.next(); Iterator<String> columns = row.varNames(); SparqlEntity se2 = new SparqlEntity(); while (columns.hasNext()) { String column = columns.next(); RDFNode cell = row.get(column); if (cell.isResource()) { Resource resource = cell.asResource(); // do something maybe with the OntModel??? if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString()); else se.setValue(column, resource.toString()); } else if (cell.isLiteral()) { se.setValue(column, cell.asLiteral().getString()); } else if (cell.isAnon()) { se.setValue(column, "anon"); } else { se.setValue(column, cell.toString()); } } results.add(se); } for (SparqlEntity entity : results) { // System.out.println("results: " + entity.getValue("PrimaryId")); resultList.add(structure.id + "\t" + entity.getValue("PrimaryId").toString()); } } } } PrintWriter writer = null; try { writer = new PrintWriter( new OutputStreamWriter(new FileOutputStream("/tmp/HashSet.txt"), "UTF-8")); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } for (String c : resultList) { System.out.println(c); writer.println(c); } }
/** * Get JSON map for a given resource by applying the river settings * * @param rs resource being processed * @param properties properties to be indexed * @param model model returned by the indexing query * @param getPropLabel if set to true all URI property values will be indexed as their label. The * label is taken as the value of one of the properties set in {@link #uriDescriptionList}. * @return map of properties to be indexed for res */ private Map<String, ArrayList<String>> getJsonMap( Resource rs, Set<Property> properties, Model model, boolean getPropLabel) { Map<String, ArrayList<String>> jsonMap = new HashMap<String, ArrayList<String>>(); ArrayList<String> results = new ArrayList<String>(); if (addUriForResource) { results.add("\"" + rs.toString() + "\""); jsonMap.put("http://www.w3.org/1999/02/22-rdf-syntax-ns#about", results); } Set<String> rdfLanguages = new HashSet<String>(); for (Property prop : properties) { NodeIterator niter = model.listObjectsOfProperty(rs, prop); String property = prop.toString(); results = new ArrayList<String>(); String lang; String currValue; while (niter.hasNext()) { RDFNode node = niter.next(); currValue = getStringForResult(node, getPropLabel); if (addLanguage) { if (node.isLiteral()) { lang = node.asLiteral().getLanguage(); if (!lang.isEmpty()) { rdfLanguages.add("\"" + lang + "\""); } } } String shortValue = currValue; int currLen = currValue.length(); // Unquote string if (currLen > 1) shortValue = currValue.substring(1, currLen - 1); // If either whiteMap does contains shortValue // or blackMap contains the value // skip adding it to the index boolean whiteMapCond = whiteMap.containsKey(property) && !whiteMap.get(property).contains(shortValue); boolean blackMapCond = blackMap.containsKey(property) && blackMap.get(property).contains(shortValue); if (whiteMapCond || blackMapCond) { continue; } if (normalizeObj.containsKey(shortValue)) { results.add("\"" + normalizeObj.get(shortValue) + "\""); } else { results.add(currValue); } } // Do not index empty properties if (results.isEmpty()) continue; if (normalizeProp.containsKey(property)) { property = normalizeProp.get(property); if (jsonMap.containsKey(property)) { jsonMap.get(property).addAll(results); } else { jsonMap.put(property, results); } } else { jsonMap.put(property, results); } } if (addLanguage) { if (rdfLanguages.isEmpty() && !language.isEmpty()) rdfLanguages.add(language); if (!rdfLanguages.isEmpty()) jsonMap.put("language", new ArrayList<String>(rdfLanguages)); } for (Map.Entry<String, String> it : normalizeMissing.entrySet()) { if (!jsonMap.containsKey(it.getKey())) { ArrayList<String> res = new ArrayList<String>(); res.add("\"" + it.getValue() + "\""); jsonMap.put(it.getKey(), res); } } return jsonMap; }