Пример #1
0
  @Test
  public void testKBtoWurcsSparql() throws SparqlException {

    List<Structure> structures = Ebean.find(Structure.class).findList();
    HashSet<String> resultList = new HashSet<>();

    String ct = "";

    for (Structure structure : structures) {
      if (structure.id >= 7400) {

        if (structure.glycanst.startsWith("v--")) {
          structure.glycanst = structure.glycanst.replace("v--", "FreeEnd--");
        }

        if (structure.glycanst.startsWith("FreenEnd")) {
          structure.glycanst = structure.glycanst.replace("FreenEnd", "FreeEnd");
        }

        if (structure.glycanst.startsWith("FreeEnd?")) {
          structure.glycanst = structure.glycanst.replace("FreeEnd?", "FreeEnd--?");
        }

        if (structure.glycanst.startsWith("<Gly") || structure.glycanst.contains("0.0000u")) {
          continue;
        }

        System.out.println(structure.getGlycanst());

        BuilderWorkspace workspace = new BuilderWorkspace(new GlycanRendererAWT());
        workspace.setNotation("cfg"); // cfgbw | uoxf | uoxfcol | text
        GlycanRenderer renderer = workspace.getGlycanRenderer();
        org.eurocarbdb.application.glycanbuilder.Glycan glycan =
            org.eurocarbdb.application.glycanbuilder.Glycan.fromString(structure.glycanst.trim());
        if (glycan != null) {
          ct = glycan.toGlycoCTCondensed();
          System.out.println("this was the ct: " + ct);
          GlycoSequenceToWurcsSelectSparql s = new GlycoSequenceToWurcsSelectSparql("glycoct");
          SparqlEntity se = new SparqlEntity();
          ct = StringUtils.chomp(ct);
          se.setValue(
              GlycoSequenceToWurcsSelectSparql.FromSequence,
              ct.replaceAll("\n", "\\\\n")
                  .replaceAll("x\\(", "u\\(")
                  .replaceAll("\\)x", "\\)u")
                  .trim());
          s.setSparqlEntity(se);
          logger.debug(s.getSparql());

          Query query =
              QueryFactory.create(s.getSparql().replaceAll("null", "").replace("?Sequence", ""));
          System.out.println(
              "Id "
                  + structure.id
                  + " Query: "
                  + s.getSparql().replaceAll("null", "").replace("?Sequence", ""));
          QueryExecution qe =
              QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query);
          ResultSet rs = qe.execSelect();

          List<SparqlEntity> results = new ArrayList<>();
          HashSet<String> resultsList = new HashSet<>();

          while (rs.hasNext()) {
            QuerySolution row = rs.next();
            Iterator<String> columns = row.varNames();
            SparqlEntity se2 = new SparqlEntity();
            while (columns.hasNext()) {
              String column = columns.next();
              RDFNode cell = row.get(column);

              if (cell.isResource()) {
                Resource resource = cell.asResource();
                // do something maybe with the OntModel???
                if (resource.isLiteral()) se.setValue(column, resource.asLiteral().getString());
                else se.setValue(column, resource.toString());
              } else if (cell.isLiteral()) {
                se.setValue(column, cell.asLiteral().getString());
              } else if (cell.isAnon()) {
                se.setValue(column, "anon");
              } else {
                se.setValue(column, cell.toString());
              }
            }
            results.add(se);
          }

          for (SparqlEntity entity : results) {
            // System.out.println("results: " + entity.getValue("PrimaryId"));
            resultList.add(structure.id + "\t" + entity.getValue("PrimaryId").toString());
          }
        }
      }
    }
    PrintWriter writer = null;
    try {
      writer =
          new PrintWriter(
              new OutputStreamWriter(new FileOutputStream("/tmp/HashSet.txt"), "UTF-8"));
    } catch (UnsupportedEncodingException e) {
      e.printStackTrace();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }
    for (String c : resultList) {
      System.out.println(c);
      writer.println(c);
    }
  }
Пример #2
0
  @Test
  public void testGlycotoucanCTSearch() {

    List<Structure> structures = Ebean.find(Structure.class).findList();
    String ct = "";

    for (Structure structure : structures) {
      if (structure.id >= 7400) {

        if (structure.glycanst.startsWith("v--")) {
          structure.glycanst = structure.glycanst.replace("v--", "FreeEnd--");
        }

        if (structure.glycanst.startsWith("FreenEnd")) {
          structure.glycanst = structure.glycanst.replace("FreenEnd", "FreeEnd");
        }

        if (structure.glycanst.startsWith("FreeEnd?")) {
          structure.glycanst = structure.glycanst.replace("FreeEnd?", "FreeEnd--?");
        }

        if (structure.glycanst.startsWith("<Gly") || structure.glycanst.contains("0.0000u")) {
          continue;
        }

        System.out.println(structure.getGlycanst());

        BuilderWorkspace workspace = new BuilderWorkspace(new GlycanRendererAWT());
        workspace.setNotation("cfg"); // cfgbw | uoxf | uoxfcol | text

        GlycanRenderer renderer = workspace.getGlycanRenderer();

        org.eurocarbdb.application.glycanbuilder.Glycan glycan =
            org.eurocarbdb.application.glycanbuilder.Glycan.fromString(structure.glycanst);
        ct = glycan.toGlycoCTCondensed();

        // System.out.println(ct);
        //  }
        // }

        /*String ct = "RES\\n" +
                "1b:a-dgal-HEX-1:5\\n" +
                "2s:n-acetyl\\n" +
                "3b:b-dgal-HEX-1:5\\n" +
                "4b:a-lgal-HEX-1:5|6:d\\n" +
                "5b:a-dgal-HEX-1:5\\n" +
                "6s:n-acetyl\\n" +
                "7b:b-dglc-HEX-1:5\\n" +
                "8s:n-acetyl\\n" +
                "LIN\\n" +
                "1:1d(2+1)2n\\n" +
                "2:1o(3+1)3d\\n" +
                "3:3o(2+1)4d\\n" +
                "4:3o(3+1)5d\\n" +
                "5:5d(2+1)6n\\n" +
                "6:1o(6+1)7d\\n" +
                "7:7d(2+1)8n";

        ct = "RES\n" +
                "1b:b-dglc-HEX-1:5\n" +
                "2s:n-acetyl\n" +
                "3b:b-dglc-HEX-1:5\n" +
                "4s:n-acetyl\n" +
                "5b:b-dman-HEX-1:5\n" +
                "6b:a-dman-HEX-1:5\n" +
                "7b:a-dman-HEX-1:5\n" +
                "8b:a-lgal-HEX-1:5|6:d\n" +
                "LIN\n" +
                "1:1d(2+1)2n\n" +
                "2:1o(4+1)3d\n" +
                "3:3d(2+1)4n\n" +
                "4:3o(4+1)5d\n" +
                "5:5o(3+1)6d\n" +
                "6:5o(6+1)7d\n" +
                "7:1o(6+1)8d\n" +
                "UND\n" +
                "UND1:100.0:100.0\n" +
                "ParentIDs:1|3|5|6|7|8\n" +
                "SubtreeLinkageID1:x(-1+1)x\n" +
                "RES\n" +
                "9b:b-dglc-HEX-1:5\n" +
                "10s:n-acetyl\n" +
                "11b:a-lgal-HEX-1:5|6:d\n" +
                "12b:b-dgal-HEX-1:5\n" +
                "13b:a-dgro-dgal-NON-2:6|1:a|2:keto|3:d\n" +
                "14s:n-acetyl\n" +
                "LIN\n" +
                "8:9d(2+1)10n\n" +
                "9:9o(3+1)11d\n" +
                "10:9o(4+1)12d\n" +
                "11:12o(-1+2)13d\n" +
                "12:13d(5+1)14n\n" +
                "UND2:100.0:100.0\n" +
                "ParentIDs:1|3|5|6|7|8\n" +
                "SubtreeLinkageID1:x(-1+1)x\n" +
                "RES\n" +
                "15b:b-dglc-HEX-1:5\n" +
                "16s:n-acetyl\n" +
                "17b:a-lgal-HEX-1:5|6:d\n" +
                "18b:b-dgal-HEX-1:5\n" +
                "LIN\n" +
                "13:15d(2+1)16n\n" +
                "14:15o(3+1)17d\n" +
                "15:15o(4+1)18d\n" +
                "UND3:100.0:100.0\n" +
                "ParentIDs:1|3|5|6|7|8\n" +
                "SubtreeLinkageID1:x(-1+1)x\n" +
                "RES\n" +
                "19b:b-dglc-HEX-1:5\n" +
                "20s:n-acetyl\n" +
                "21b:b-dgal-HEX-1:5\n" +
                "LIN\n" +
                "16:19d(2+1)20n\n" +
                "17:19o(4+1)21d";
                */

        ct = ct.replaceAll("\n", "\\\\n").replaceAll("x\\(", "u\\(").replaceAll("\\)x", "\\)u");
        System.out.println("new ct: " + ct);

        String queryString =
            "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n"
                + "PREFIX glycan: <http://purl.jp/bio/12/glyco/glycan#>\n"
                + "PREFIX wurcs: <http://www.glycoinfo.org/glyco/owl/wurcs#>\n"
                + "SELECT DISTINCT ?glycan ?c\n"
                + "# FROM <http://rdf.glycoinfo.org/wurcs/0.5.0>\n"
                + "# FROM <http://rdf.glycoinfo.org/wurcs/0.5.0/ms>\n"
                + "WHERE {\n"
                + "  ?glycan a \tglycan:glycosequence ;\n"
                + "\tglycan:in_carbohydrate_format  glycan:carbohydrate_format_glycoct ;\n"
                + "\tglycan:has_sequence\n"
                + "\t\t?c filter(contains(?c, \"RES\\n1b:b-dglc-HEX-1\")) .\n"
                +
                // "\t\t?c filter(contains(?c, \"" + ct + "\" )) .\n" +
                "\n"
                + "  }\n"
                + "  ORDER BY ?glycan\n"
                + "limit 10";

        System.out.println("String: " + queryString + "\t\tID: " + structure.id);

        Query query = QueryFactory.create(queryString);

        QueryExecution qExe =
            QueryExecutionFactory.sparqlService("http://test.ts.glytoucan.org/sparql", query);
        ResultSet results = qExe.execSelect();
        ResultSetFormatter.out(System.out, results, query);
      }
    }
  }