@Test public void testKBtoWurcsSparql() throws SparqlException { List<Structure> structures = Ebean.find(Structure.class).findList(); HashSet<String> resultList = new HashSet<>(); String ct = ""; for (Structure structure : structures) { if (structure.id >= 7400) { if (structure.glycanst.startsWith("v--")) { structure.glycanst = structure.glycanst.replace("v--", "FreeEnd--"); } if (structure.glycanst.startsWith("FreenEnd")) { structure.glycanst = structure.glycanst.replace("FreenEnd", "FreeEnd"); } if (structure.glycanst.startsWith("FreeEnd?")) { structure.glycanst = structure.glycanst.replace("FreeEnd?", "FreeEnd--?"); } if (structure.glycanst.startsWith("<Gly") || structure.glycanst.contains("0.0000u")) { continue; } System.out.println(structure.getGlycanst()); BuilderWorkspace workspace = new BuilderWorkspace(new GlycanRendererAWT()); workspace.setNotation("cfg"); // cfgbw | uoxf | uoxfcol | text GlycanRenderer renderer = workspace.getGlycanRenderer(); org.eurocarbdb.application.glycanbuilder.Glycan glycan = org.eurocarbdb.application.glycanbuilder.Glycan.fromString(structure.glycanst.trim()); if (glycan != null) { ct = glycan.toGlycoCTCondensed(); System.out.println("this was the ct: " + ct); GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct"); SparqlEntity se = new SparqlEntity(); ct = StringUtils.chomp(ct); se.setValue( GlycoSequenceToWurcsSelectSparql.FromSequence, ct.replaceAll("\n", "\\\\n") .replaceAll("x\\(", "u\\(") .replaceAll("\\)x", "\\)u") .trim()); s.setSparqlEntity(se); logger.debug(s.getSparql()); Query query = QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", "")); System.out.println( "Id " + structure.id + " Query: " + s.getSparql().replaceAll("null", "").replace("?Sequence", "")); QueryExecution qe = QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query); ResultSet rs = qe.execSelect(); List<SparqlEntity> results = new ArrayList<>(); HashSet<String> resultsList = new HashSet<>(); while (rs.hasNext()) { QuerySolution row = rs.next(); Iterator<String> columns = row.varNames(); SparqlEntity se2 = new SparqlEntity(); while (columns.hasNext()) { String column = columns.next(); RDFNode cell = row.get(column); if (cell.isResource()) { Resource resource = cell.asResource(); // do something maybe with the OntModel??? if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString()); else se.setValue(column, resource.toString()); } else if (cell.isLiteral()) { se.setValue(column, cell.asLiteral().getString()); } else if (cell.isAnon()) { se.setValue(column, "anon"); } else { se.setValue(column, cell.toString()); } } results.add(se); } for (SparqlEntity entity : results) { // System.out.println("results: " + entity.getValue("PrimaryId")); resultList.add(structure.id + "\t" + entity.getValue("PrimaryId").toString()); } } } } PrintWriter writer = null; try { writer = new PrintWriter( new OutputStreamWriter(new FileOutputStream("/tmp/HashSet.txt"), "UTF-8")); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } for (String c : resultList) { System.out.println(c); writer.println(c); } }
@Test public void testGlycotoucanCTSearch() { List<Structure> structures = Ebean.find(Structure.class).findList(); String ct = ""; for (Structure structure : structures) { if (structure.id >= 7400) { if (structure.glycanst.startsWith("v--")) { structure.glycanst = structure.glycanst.replace("v--", "FreeEnd--"); } if (structure.glycanst.startsWith("FreenEnd")) { structure.glycanst = structure.glycanst.replace("FreenEnd", "FreeEnd"); } if (structure.glycanst.startsWith("FreeEnd?")) { structure.glycanst = structure.glycanst.replace("FreeEnd?", "FreeEnd--?"); } if (structure.glycanst.startsWith("<Gly") || structure.glycanst.contains("0.0000u")) { continue; } System.out.println(structure.getGlycanst()); BuilderWorkspace workspace = new BuilderWorkspace(new GlycanRendererAWT()); workspace.setNotation("cfg"); // cfgbw | uoxf | uoxfcol | text GlycanRenderer renderer = workspace.getGlycanRenderer(); org.eurocarbdb.application.glycanbuilder.Glycan glycan = org.eurocarbdb.application.glycanbuilder.Glycan.fromString(structure.glycanst); ct = glycan.toGlycoCTCondensed(); // System.out.println(ct); // } // } /*String ct = "RES\\n" + "1b:a-dgal-HEX-1:5\\n" + "2s:n-acetyl\\n" + "3b:b-dgal-HEX-1:5\\n" + "4b:a-lgal-HEX-1:5|6:d\\n" + "5b:a-dgal-HEX-1:5\\n" + "6s:n-acetyl\\n" + "7b:b-dglc-HEX-1:5\\n" + "8s:n-acetyl\\n" + "LIN\\n" + "1:1d(2+1)2n\\n" + "2:1o(3+1)3d\\n" + "3:3o(2+1)4d\\n" + "4:3o(3+1)5d\\n" + "5:5d(2+1)6n\\n" + "6:1o(6+1)7d\\n" + "7:7d(2+1)8n"; ct = "RES\n" + "1b:b-dglc-HEX-1:5\n" + "2s:n-acetyl\n" + "3b:b-dglc-HEX-1:5\n" + "4s:n-acetyl\n" + "5b:b-dman-HEX-1:5\n" + "6b:a-dman-HEX-1:5\n" + "7b:a-dman-HEX-1:5\n" + "8b:a-lgal-HEX-1:5|6:d\n" + "LIN\n" + "1:1d(2+1)2n\n" + "2:1o(4+1)3d\n" + "3:3d(2+1)4n\n" + "4:3o(4+1)5d\n" + "5:5o(3+1)6d\n" + "6:5o(6+1)7d\n" + "7:1o(6+1)8d\n" + "UND\n" + "UND1:100.0:100.0\n" + "ParentIDs:1|3|5|6|7|8\n" + "SubtreeLinkageID1:x(-1+1)x\n" + "RES\n" + "9b:b-dglc-HEX-1:5\n" + "10s:n-acetyl\n" + "11b:a-lgal-HEX-1:5|6:d\n" + "12b:b-dgal-HEX-1:5\n" + "13b:a-dgro-dgal-NON-2:6|1:a|2:keto|3:d\n" + "14s:n-acetyl\n" + "LIN\n" + "8:9d(2+1)10n\n" + "9:9o(3+1)11d\n" + "10:9o(4+1)12d\n" + "11:12o(-1+2)13d\n" + "12:13d(5+1)14n\n" + "UND2:100.0:100.0\n" + "ParentIDs:1|3|5|6|7|8\n" + "SubtreeLinkageID1:x(-1+1)x\n" + "RES\n" + "15b:b-dglc-HEX-1:5\n" + "16s:n-acetyl\n" + "17b:a-lgal-HEX-1:5|6:d\n" + "18b:b-dgal-HEX-1:5\n" + "LIN\n" + "13:15d(2+1)16n\n" + "14:15o(3+1)17d\n" + "15:15o(4+1)18d\n" + "UND3:100.0:100.0\n" + "ParentIDs:1|3|5|6|7|8\n" + "SubtreeLinkageID1:x(-1+1)x\n" + "RES\n" + "19b:b-dglc-HEX-1:5\n" + "20s:n-acetyl\n" + "21b:b-dgal-HEX-1:5\n" + "LIN\n" + "16:19d(2+1)20n\n" + "17:19o(4+1)21d"; */ ct = ct.replaceAll("\n", "\\\\n").replaceAll("x\\(", "u\\(").replaceAll("\\)x", "\\)u"); System.out.println("new ct: " + ct); String queryString = "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" + "PREFIX glycan: <http://purl.jp/bio/12/glyco/glycan#>\n" + "PREFIX wurcs: <http://www.glycoinfo.org/glyco/owl/wurcs#>\n" + "SELECT DISTINCT ?glycan ?c\n" + "# FROM <http://rdf.glycoinfo.org/wurcs/0.5.0>\n" + "# FROM <http://rdf.glycoinfo.org/wurcs/0.5.0/ms>\n" + "WHERE {\n" + " ?glycan a \tglycan:glycosequence ;\n" + "\tglycan:in_carbohydrate_format glycan:carbohydrate_format_glycoct ;\n" + "\tglycan:has_sequence\n" + "\t\t?c filter(contains(?c, \"RES\\n1b:b-dglc-HEX-1\")) .\n" + // "\t\t?c filter(contains(?c, \"" + ct + "\" )) .\n" + "\n" + " }\n" + " ORDER BY ?glycan\n" + "limit 10"; System.out.println("String: " + queryString + "\t\tID: " + structure.id); Query query = QueryFactory.create(queryString); QueryExecution qExe = QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query); ResultSet results = qExe.execSelect(); ResultSetFormatter.out(System.out, results, query); } } }