Esempio n. 1
0
  public List<Result> createObjects(double maxEScore) throws IOException, ParseException {
    if (targetFile == null) throw new IllegalStateException("File to be parsed not specified.");

    // getAbsolutePath throws SecurityException
    readFile(targetFile.getAbsolutePath());
    // create mappings between sequences and blast id
    mapIds();

    ArrayList<Result> resultsCollection;
    ArrayList<Hit> hitsCollection;
    ArrayList<Hsp> hspsCollection;

    try {
      // select top level elements
      String program =
          XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_program")
              .getTextContent();
      String version =
          XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_version")
              .getTextContent();
      String reference =
          XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_reference")
              .getTextContent();
      String dbFile =
          XMLHelper.selectSingleElement(blastDoc.getDocumentElement(), "BlastOutput_db")
              .getTextContent();

      logger.info("Query for hits in " + targetFile);
      ArrayList<Element> IterationsList =
          XMLHelper.selectElements(
              blastDoc.getDocumentElement(), "BlastOutput_iterations/Iteration[Iteration_hits]");
      logger.info(IterationsList.size() + " results");

      resultsCollection = new ArrayList<Result>();
      for (Element element : IterationsList) {
        BlastResultBuilder resultBuilder = new BlastResultBuilder();
        // will add BlastOutput* key sections in the result object
        resultBuilder
            .setProgram(program)
            .setVersion(version)
            .setReference(reference)
            .setDbFile(dbFile);

        // Iteration* section keys:
        resultBuilder
            .setIterationNumber(
                new Integer(
                    XMLHelper.selectSingleElement(element, "Iteration_iter-num").getTextContent()))
            .setQueryID(
                XMLHelper.selectSingleElement(element, "Iteration_query-ID").getTextContent())
            .setQueryDef(
                XMLHelper.selectSingleElement(element, "Iteration_query-def").getTextContent())
            .setQueryLength(
                new Integer(
                    XMLHelper.selectSingleElement(element, "Iteration_query-len")
                        .getTextContent()));

        if (queryReferences != null)
          resultBuilder.setQuerySequence(
              queryReferencesMap.get(
                  XMLHelper.selectSingleElement(element, "Iteration_query-ID").getTextContent()));

        Element iterationHitsElement = XMLHelper.selectSingleElement(element, "Iteration_hits");
        ArrayList<Element> hitList = XMLHelper.selectElements(iterationHitsElement, "Hit");

        hitsCollection = new ArrayList<Hit>();
        for (Element hitElement : hitList) {
          BlastHitBuilder blastHitBuilder = new BlastHitBuilder();
          blastHitBuilder
              .setHitNum(
                  new Integer(
                      XMLHelper.selectSingleElement(hitElement, "Hit_num").getTextContent()))
              .setHitId(XMLHelper.selectSingleElement(hitElement, "Hit_id").getTextContent())
              .setHitDef(XMLHelper.selectSingleElement(hitElement, "Hit_def").getTextContent())
              .setHitAccession(
                  XMLHelper.selectSingleElement(hitElement, "Hit_accession").getTextContent())
              .setHitLen(
                  new Integer(
                      XMLHelper.selectSingleElement(hitElement, "Hit_len").getTextContent()));

          if (databaseReferences != null)
            blastHitBuilder.setHitSequence(
                databaseReferencesMap.get(
                    XMLHelper.selectSingleElement(hitElement, "Hit_id").getTextContent()));

          Element hithspsElement = XMLHelper.selectSingleElement(hitElement, "Hit_hsps");
          ArrayList<Element> hspList = XMLHelper.selectElements(hithspsElement, "Hsp");

          hspsCollection = new ArrayList<Hsp>();
          for (Element hspElement : hspList) {
            Double evalue =
                new Double(
                    XMLHelper.selectSingleElement(hspElement, "Hsp_evalue").getTextContent());

            // add the new hsp only if it pass the specified threshold. It can save lot of memory
            // and some parsing time
            if (evalue <= maxEScore) {
              BlastHspBuilder blastHspBuilder = new BlastHspBuilder();
              blastHspBuilder
                  .setHspNum(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_num").getTextContent()))
                  .setHspBitScore(
                      new Double(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_bit-score")
                              .getTextContent()))
                  .setHspScore(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_score").getTextContent()))
                  .setHspEvalue(evalue)
                  .setHspQueryFrom(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_query-from")
                              .getTextContent()))
                  .setHspQueryTo(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_query-to")
                              .getTextContent()))
                  .setHspHitFrom(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_hit-from")
                              .getTextContent()))
                  .setHspHitTo(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_hit-to").getTextContent()))
                  .setHspQueryFrame(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_query-frame")
                              .getTextContent()))
                  .setHspHitFrame(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_hit-frame")
                              .getTextContent()))
                  .setHspIdentity(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_identity")
                              .getTextContent()))
                  .setHspPositive(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_positive")
                              .getTextContent()))
                  .setHspGaps(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_gaps").getTextContent()))
                  .setHspAlignLen(
                      new Integer(
                          XMLHelper.selectSingleElement(hspElement, "Hsp_align-len")
                              .getTextContent()))
                  .setHspQseq(
                      XMLHelper.selectSingleElement(hspElement, "Hsp_qseq").getTextContent())
                  .setHspHseq(
                      XMLHelper.selectSingleElement(hspElement, "Hsp_hseq").getTextContent())
                  .setHspIdentityString(
                      XMLHelper.selectSingleElement(hspElement, "Hsp_midline").getTextContent());

              hspsCollection.add(blastHspBuilder.createBlastHsp());
            }
          }
          // finally set the computed hsp collection and create Hit object
          blastHitBuilder.setHsps(hspsCollection);
          hitsCollection.add(blastHitBuilder.createBlastHit());
        }
        // finally set the computed Hit collection to the result
        resultBuilder.setHits(hitsCollection);
        resultsCollection.add(resultBuilder.createBlastResult());
      }
    } catch (XPathException e) {
      throw new ParseException(e.getMessage(), 0);
    }
    logger.info("Parsing of " + targetFile + " finished.");

    return resultsCollection;
  }