예제 #1
0
  /**
   * Creates a map from the ChEBI Compounds resources of secondary to primary parentMap.
   *
   * @throws IOException problem reading file
   * @throws MissingLocationException if ChEBI Compounds resource location is missing
   */
  public void createMap() throws IOException, MissingLocationException {

    ResourceFileLocation location = getLocation("ChEBI Compounds");
    CSVReader csv = new CSVReader(new InputStreamReader(location.open()), '\t');

    List<String> header = Arrays.asList(csv.readNext());
    int accessionIndex = header.indexOf("CHEBI_ACCESSION");
    int parentIndex = header.indexOf("PARENT_ID");
    int statusIndex = header.indexOf("STATUS");

    Pattern NULL_PATTERN = Pattern.compile("null");
    Pattern ACCESSION_PATTERN = Pattern.compile("(?:C[hH]EBI:)?(\\d+)");

    String[] row = null;
    while ((row = csv.readNext()) != null) {

      String accession = row[accessionIndex];
      String parent = row[parentIndex];
      Character status = row[statusIndex].charAt(0);

      Matcher accessionMatcher = ACCESSION_PATTERN.matcher(accession);
      Matcher parentMatcher = ACCESSION_PATTERN.matcher(parent);

      if (accessionMatcher.find()) {

        String childAcc = accessionMatcher.group(1);
        String parentAcc = parentMatcher.find() ? parentMatcher.group(1) : childAcc;

        childMap.put(parentAcc, "CHEBI:" + childAcc);
        childMap.put("CHEBI:" + parentAcc, "CHEBI:" + childAcc);

        parentMap.put(childAcc, "CHEBI:" + parentAcc);
        parentMap.put("CHEBI:" + childAcc, "CHEBI:" + parentAcc);

        statusMap.put(childAcc, status);
        statusMap.put("CHEBI:" + childAcc, status);
      }
    }

    location.close();
    csv.close();
  }
예제 #2
0
  @Override
  public void update() throws IOException {
    ResourceFileLocation location = getLocation("KEGG Reaction");
    HSQLDBLocation connection = connection();
    try {
      Hsqldb.createReactionSchema(connection.getConnection());
      DSLContext create = DSL.using(connection.getConnection(), HSQLDB);

      Set<String> compoundIds = Sets.newHashSetWithExpectedSize(10000);

      InsertValuesStep2<?, String, String> reactionInsert =
          create.insertInto(REACTION, REACTION.ACCESSION, REACTION.EC);
      InsertValuesStep1<?, String> compoundInsert = create.insertInto(COMPOUND, COMPOUND.ACCESSION);

      List<String[]> reactants = new ArrayList<String[]>(10000);
      List<String[]> products = new ArrayList<String[]>(10000);

      KEGGReactionParser parser =
          new KEGGReactionParser(
              location.open(), KEGGField.ENTRY, KEGGField.EQUATION, KEGGField.ENZYME);
      Map<KEGGField, StringBuilder> entry;
      while ((entry = parser.readNext()) != null) {

        if (isCancelled()) break;

        String equation = entry.get(KEGGField.EQUATION).toString();
        String ec =
            entry.containsKey(KEGGField.ENZYME)
                ? entry.get(KEGGField.ENZYME).toString().trim()
                : "";
        String[] sides = equation.split("<=>");

        String[][] left = getParticipants(sides[0]);
        String[][] right = getParticipants(sides[1]);

        Matcher matcher = ACCESSION.matcher(entry.get(KEGGField.ENTRY).toString());

        if (!ec.isEmpty()) ec = ec.split("\\s+")[0].trim();

        if (matcher.find()) {
          String accession = matcher.group(1);
          reactionInsert.values(accession, ec);

          for (String[] participant : left) {
            String cid = participant[1];
            if (compoundIds.add(cid)) compoundInsert.values(cid);
            participant = Arrays.copyOf(participant, 3);
            participant[2] = accession;
            reactants.add(participant);
          }
          for (String[] participant : right) {
            String cid = participant[1];
            if (compoundIds.add(cid)) compoundInsert.values(cid);
            participant = Arrays.copyOf(participant, 3);
            participant[2] = accession;
            products.add(participant);
          }
        }
      }

      // do the inserts
      fireProgressUpdate("inserting reactions and compounds");
      reactionInsert.execute();
      compoundInsert.execute();

      fireProgressUpdate("inserting reaction relations");

      for (int i = 0, end = reactants.size() - 1; i <= end; i++) {

        String[] participant = reactants.get(i);
        double coef = Double.parseDouble(participant[0]);
        String cid = participant[1];
        String acc = participant[2];
        create
            .insertInto(REACTANT)
            .set(REACTANT.COEFFICIENT, coef)
            .set(
                REACTANT.COMPOUND_ID,
                create.select(COMPOUND.ID).from(COMPOUND).where(COMPOUND.ACCESSION.eq(cid)))
            .set(
                REACTANT.REACTION_ID,
                create.select(REACTION.ID).from(REACTION).where(REACTION.ACCESSION.eq(acc)))
            .execute();
      }

      for (int i = 0, end = products.size() - 1; i <= end; i++) {

        String[] participant = products.get(i);
        double coef = Double.parseDouble(participant[0]);
        String cid = participant[1];
        String acc = participant[2];
        create
            .insertInto(PRODUCT)
            .set(PRODUCT.COEFFICIENT, coef)
            .set(
                PRODUCT.COMPOUND_ID,
                create.select(COMPOUND.ID).from(COMPOUND).where(COMPOUND.ACCESSION.eq(cid)))
            .set(
                PRODUCT.REACTION_ID,
                create.select(REACTION.ID).from(REACTION).where(REACTION.ACCESSION.eq(acc)))
            .execute();
      }

    } catch (SQLException e) {
      throw new IOException(e);
    } finally {
      location.close();
      try {
        connection.commit();
      } catch (SQLException e) {
        System.err.println(e.getMessage());
      } finally {
        try {
          connection.close();
        } catch (SQLException e) {
        }
      }
    }
  }