/** * Creates a map from the ChEBI Compounds resources of secondary to primary parentMap. * * @throws IOException problem reading file * @throws MissingLocationException if ChEBI Compounds resource location is missing */ public void createMap() throws IOException, MissingLocationException { ResourceFileLocation location = getLocation("ChEBI Compounds"); CSVReader csv = new CSVReader(new InputStreamReader(location.open()), '\t'); List<String> header = Arrays.asList(csv.readNext()); int accessionIndex = header.indexOf("CHEBI_ACCESSION"); int parentIndex = header.indexOf("PARENT_ID"); int statusIndex = header.indexOf("STATUS"); Pattern NULL_PATTERN = Pattern.compile("null"); Pattern ACCESSION_PATTERN = Pattern.compile("(?:C[hH]EBI:)?(\\d+)"); String[] row = null; while ((row = csv.readNext()) != null) { String accession = row[accessionIndex]; String parent = row[parentIndex]; Character status = row[statusIndex].charAt(0); Matcher accessionMatcher = ACCESSION_PATTERN.matcher(accession); Matcher parentMatcher = ACCESSION_PATTERN.matcher(parent); if (accessionMatcher.find()) { String childAcc = accessionMatcher.group(1); String parentAcc = parentMatcher.find() ? parentMatcher.group(1) : childAcc; childMap.put(parentAcc, "CHEBI:" + childAcc); childMap.put("CHEBI:" + parentAcc, "CHEBI:" + childAcc); parentMap.put(childAcc, "CHEBI:" + parentAcc); parentMap.put("CHEBI:" + childAcc, "CHEBI:" + parentAcc); statusMap.put(childAcc, status); statusMap.put("CHEBI:" + childAcc, status); } } location.close(); csv.close(); }
@Override public void update() throws IOException { ResourceFileLocation location = getLocation("KEGG Reaction"); HSQLDBLocation connection = connection(); try { Hsqldb.createReactionSchema(connection.getConnection()); DSLContext create = DSL.using(connection.getConnection(), HSQLDB); Set<String> compoundIds = Sets.newHashSetWithExpectedSize(10000); InsertValuesStep2<?, String, String> reactionInsert = create.insertInto(REACTION, REACTION.ACCESSION, REACTION.EC); InsertValuesStep1<?, String> compoundInsert = create.insertInto(COMPOUND, COMPOUND.ACCESSION); List<String[]> reactants = new ArrayList<String[]>(10000); List<String[]> products = new ArrayList<String[]>(10000); KEGGReactionParser parser = new KEGGReactionParser( location.open(), KEGGField.ENTRY, KEGGField.EQUATION, KEGGField.ENZYME); Map<KEGGField, StringBuilder> entry; while ((entry = parser.readNext()) != null) { if (isCancelled()) break; String equation = entry.get(KEGGField.EQUATION).toString(); String ec = entry.containsKey(KEGGField.ENZYME) ? entry.get(KEGGField.ENZYME).toString().trim() : ""; String[] sides = equation.split("<=>"); String[][] left = getParticipants(sides[0]); String[][] right = getParticipants(sides[1]); Matcher matcher = ACCESSION.matcher(entry.get(KEGGField.ENTRY).toString()); if (!ec.isEmpty()) ec = ec.split("\\s+")[0].trim(); if (matcher.find()) { String accession = matcher.group(1); reactionInsert.values(accession, ec); for (String[] participant : left) { String cid = participant[1]; if (compoundIds.add(cid)) compoundInsert.values(cid); participant = Arrays.copyOf(participant, 3); participant[2] = accession; reactants.add(participant); } for (String[] participant : right) { String cid = participant[1]; if (compoundIds.add(cid)) compoundInsert.values(cid); participant = Arrays.copyOf(participant, 3); participant[2] = accession; products.add(participant); } } } // do the inserts fireProgressUpdate("inserting reactions and compounds"); reactionInsert.execute(); compoundInsert.execute(); fireProgressUpdate("inserting reaction relations"); for (int i = 0, end = reactants.size() - 1; i <= end; i++) { String[] participant = reactants.get(i); double coef = Double.parseDouble(participant[0]); String cid = participant[1]; String acc = participant[2]; create .insertInto(REACTANT) .set(REACTANT.COEFFICIENT, coef) .set( REACTANT.COMPOUND_ID, create.select(COMPOUND.ID).from(COMPOUND).where(COMPOUND.ACCESSION.eq(cid))) .set( REACTANT.REACTION_ID, create.select(REACTION.ID).from(REACTION).where(REACTION.ACCESSION.eq(acc))) .execute(); } for (int i = 0, end = products.size() - 1; i <= end; i++) { String[] participant = products.get(i); double coef = Double.parseDouble(participant[0]); String cid = participant[1]; String acc = participant[2]; create .insertInto(PRODUCT) .set(PRODUCT.COEFFICIENT, coef) .set( PRODUCT.COMPOUND_ID, create.select(COMPOUND.ID).from(COMPOUND).where(COMPOUND.ACCESSION.eq(cid))) .set( PRODUCT.REACTION_ID, create.select(REACTION.ID).from(REACTION).where(REACTION.ACCESSION.eq(acc))) .execute(); } } catch (SQLException e) { throw new IOException(e); } finally { location.close(); try { connection.commit(); } catch (SQLException e) { System.err.println(e.getMessage()); } finally { try { connection.close(); } catch (SQLException e) { } } } }