private void readFile(String blastFile) throws IOException, ParseException { logger.info("Start reading " + blastFile); try { blastDoc = XMLHelper.loadXML(blastFile); } catch (SAXException ex) { logger.error("A parsing error has occurred while reading XML blast file"); throw new ParseException(ex.getMessage(), 0); } catch (ParserConfigurationException ex) { logger.error("Internal XML parser non properly configured"); throw new ParseException(ex.getMessage(), 0); } logger.info("Read finished"); }
public LinkedHashMap<String, DNASequence> getDNACodingSequences() throws Exception { LinkedHashMap<String, DNASequence> dnaSequenceList = new LinkedHashMap<String, DNASequence>(); ArrayList<Element> elementList = XMLHelper.selectElements(geneidDoc.getDocumentElement(), "prediction/gene/cDNA"); logger.info("{} hits", elementList.size()); for (Element dnaElement : elementList) { Element geneElement = (Element) dnaElement.getParentNode(); String sequence = dnaElement.getTextContent().replaceAll("\\W", ""); DNASequence dnaSequence = new DNASequence(sequence); String idGene = geneElement.getAttribute("idGene"); dnaSequence.setAccession(new AccessionID(idGene)); dnaSequenceList.put(idGene, dnaSequence); } return dnaSequenceList; }
public LinkedHashMap<String, ProteinSequence> getProteinSequences() throws Exception { LinkedHashMap<String, ProteinSequence> proteinSequenceList = new LinkedHashMap<String, ProteinSequence>(); ArrayList<Element> elementList = XMLHelper.selectElements(geneidDoc.getDocumentElement(), "prediction/gene/protein"); logger.info("{} hits", elementList.size()); for (Element proteinElement : elementList) { Element geneElement = (Element) proteinElement.getParentNode(); String sequence = proteinElement.getTextContent().replaceAll("\\W", ""); ProteinSequence proteinSequence = new ProteinSequence(sequence); String idGene = geneElement.getAttribute("idGene"); proteinSequence.setAccession(new AccessionID(idGene)); proteinSequenceList.put(idGene, proteinSequence); } return proteinSequenceList; }
public List<Result> createObjects(double maxEScore) throws IOException, ParseException { if (targetFile == null) throw new IllegalStateException("File to be parsed not specified."); // getAbsolutePath throws SecurityException readFile(targetFile.getAbsolutePath()); // create mappings between sequences and blast id mapIds(); ArrayList<Result> resultsCollection; ArrayList<Hit> hitsCollection; ArrayList<Hsp> hspsCollection; try { // select top level elements String program = XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_program") .getTextContent(); String version = XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_version") .getTextContent(); String reference = XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_reference") .getTextContent(); String dbFile = XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_db") .getTextContent(); logger.info("Query for hits in " + targetFile); ArrayList<Element> IterationsList = XMLHelper.selectElements( blastDoc.getDocumentElement(), "BlastOutput_iterations/Iteration[Iteration_hits]"); logger.info(IterationsList.size() + " results"); resultsCollection = new ArrayList<Result>(); for (Element element : IterationsList) { BlastResultBuilder resultBuilder = new BlastResultBuilder(); // will add BlastOutput* key sections in the result object resultBuilder .setProgram(program) .setVersion(version) .setReference(reference) .setDbFile(dbFile); // Iteration* section keys: resultBuilder .setIterationNumber( new Integer( XMLHelper.selectSingleElement(element, "Iteration_iter-num").getTextContent())) .setQueryID( XMLHelper.selectSingleElement(element, "Iteration_query-ID").getTextContent()) .setQueryDef( XMLHelper.selectSingleElement(element, "Iteration_query-def").getTextContent()) .setQueryLength( new Integer( XMLHelper.selectSingleElement(element, "Iteration_query-len") .getTextContent())); if (queryReferences != null) resultBuilder.setQuerySequence( queryReferencesMap.get( XMLHelper.selectSingleElement(element, "Iteration_query-ID").getTextContent())); Element iterationHitsElement = XMLHelper.selectSingleElement(element, "Iteration_hits"); ArrayList<Element> hitList = XMLHelper.selectElements(iterationHitsElement, "Hit"); hitsCollection = new ArrayList<Hit>(); for (Element hitElement : hitList) { BlastHitBuilder blastHitBuilder = new BlastHitBuilder(); blastHitBuilder .setHitNum( new Integer( XMLHelper.selectSingleElement(hitElement, "Hit_num").getTextContent())) .setHitId(XMLHelper.selectSingleElement(hitElement, "Hit_id").getTextContent()) .setHitDef(XMLHelper.selectSingleElement(hitElement, "Hit_def").getTextContent()) .setHitAccession( XMLHelper.selectSingleElement(hitElement, "Hit_accession").getTextContent()) .setHitLen( new Integer( XMLHelper.selectSingleElement(hitElement, "Hit_len").getTextContent())); if (databaseReferences != null) blastHitBuilder.setHitSequence( databaseReferencesMap.get( XMLHelper.selectSingleElement(hitElement, "Hit_id").getTextContent())); Element hithspsElement = XMLHelper.selectSingleElement(hitElement, "Hit_hsps"); ArrayList<Element> hspList = XMLHelper.selectElements(hithspsElement, "Hsp"); hspsCollection = new ArrayList<Hsp>(); for (Element hspElement : hspList) { Double evalue = new Double( XMLHelper.selectSingleElement(hspElement, "Hsp_evalue").getTextContent()); // add the new hsp only if it pass the specified threshold. It can save lot of memory // and some parsing time if (evalue <= maxEScore) { BlastHspBuilder blastHspBuilder = new BlastHspBuilder(); blastHspBuilder .setHspNum( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_num").getTextContent())) .setHspBitScore( new Double( XMLHelper.selectSingleElement(hspElement, "Hsp_bit-score") .getTextContent())) .setHspScore( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_score").getTextContent())) .setHspEvalue(evalue) .setHspQueryFrom( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_query-from") .getTextContent())) .setHspQueryTo( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_query-to") .getTextContent())) .setHspHitFrom( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_hit-from") .getTextContent())) .setHspHitTo( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_hit-to").getTextContent())) .setHspQueryFrame( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_query-frame") .getTextContent())) .setHspHitFrame( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_hit-frame") .getTextContent())) .setHspIdentity( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_identity") .getTextContent())) .setHspPositive( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_positive") .getTextContent())) .setHspGaps( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_gaps").getTextContent())) .setHspAlignLen( new Integer( XMLHelper.selectSingleElement(hspElement, "Hsp_align-len") .getTextContent())) .setHspQseq( XMLHelper.selectSingleElement(hspElement, "Hsp_qseq").getTextContent()) .setHspHseq( XMLHelper.selectSingleElement(hspElement, "Hsp_hseq").getTextContent()) .setHspIdentityString( XMLHelper.selectSingleElement(hspElement, "Hsp_midline").getTextContent()); hspsCollection.add(blastHspBuilder.createBlastHsp()); } } // finally set the computed hsp collection and create Hit object blastHitBuilder.setHsps(hspsCollection); hitsCollection.add(blastHitBuilder.createBlastHit()); } // finally set the computed Hit collection to the result resultBuilder.setHits(hitsCollection); resultsCollection.add(resultBuilder.createBlastResult()); } } catch (XPathException e) { throw new ParseException(e.getMessage(), 0); } logger.info("Parsing of " + targetFile + " finished."); return resultsCollection; }
public GeneIDXMLReader(String geneidXMLFile) throws Exception { logger.info("Start read of {}", geneidXMLFile); geneidDoc = XMLHelper.loadXML(geneidXMLFile); logger.info("Read finished"); }