Esempio n. 1
0
  private boolean hasLowQualityBase(SAMRecord read) {
    // TODO: Don't hardcode phred33
    for (int i = 0; i < read.getBaseQualityString().length(); i++) {
      if ((read.getBaseQualityString().charAt(i) - '!') < 20) {
        return true;
      }
    }

    return false;
  }
Esempio n. 2
0
  public String assembleContigs(
      List<String> inputFiles,
      String output,
      String tempDir,
      List<Feature> regions,
      String prefix,
      boolean checkForDupes,
      ReAligner realigner,
      CompareToReference2 c2r) {

    if ((kmers.length == 0) || (kmers[0] < KmerSizeEvaluator.MIN_KMER)) {
      KmerSizeEvaluator kmerEval = new KmerSizeEvaluator();
      int kmer = kmerEval.identifyMinKmer(readLength, c2r, regions);
      this.kmers = realigner.toKmerArray(kmer, readLength);
    }

    String contigs = "";

    long start = System.currentTimeMillis();

    int readCount = 0;

    int minReadCount = Integer.MAX_VALUE;

    // if c2r is null, this is the unaligned region.
    boolean isAssemblyCandidate = c2r == null ? true : false;

    try {

      List<List<SAMRecord>> readsList = getReads(inputFiles, regions, realigner);

      for (List<SAMRecord> reads : readsList) {
        int candidateReadCount = 0;
        for (SAMRecord read : reads) {
          if (!isAssemblyCandidate && isAssemblyTriggerCandidate(read, c2r)) {
            candidateReadCount++;
          }

          if (shouldSearchForSv && isSvCandidate(read)) {
            svCandidates.add(
                new Position(read.getMateReferenceName(), read.getMateAlignmentStart()));
          }
        }

        if (candidateReadCount > minCandidateCount(reads.size(), regions.get(0))) {
          isAssemblyCandidate = true;
        }

        if (reads.size() < minReadCount) {
          minReadCount = reads.size();
        }

        readCount += reads.size();
      }

      StringBuffer readBuffer = new StringBuffer();

      if (isAssemblyCandidate) {

        int downsampleTarget = desiredNumberOfReads(regions);

        char sampleId = 1;

        for (List<SAMRecord> reads : readsList) {
          // Default to always keep
          double keepProbability = 1.1;

          if (reads.size() > downsampleTarget) {
            keepProbability = (double) downsampleTarget / (double) reads.size();
          }

          Random random = new Random(1);

          for (SAMRecord read : reads) {
            if (random.nextDouble() < keepProbability) {
              readBuffer.append(sampleId);
              readBuffer.append(read.getReadNegativeStrandFlag() ? "1" : "0");

              if (read.getReadString().length() == readLength) {
                readBuffer.append(read.getReadString());
                readBuffer.append(read.getBaseQualityString());
              } else {
                StringBuffer basePadding = new StringBuffer();
                StringBuffer qualPadding = new StringBuffer();

                for (int i = 0; i < readLength - read.getReadString().length(); i++) {
                  basePadding.append('N');
                  qualPadding.append('!');
                }

                readBuffer.append(read.getReadString() + basePadding.toString());
                readBuffer.append(read.getBaseQualityString() + qualPadding.toString());
              }
            }
          }

          // Make this set of reads eligible for GC
          reads.clear();
          sampleId += 1;
        }
      }

      readsList.clear();

      if (isAssemblyCandidate) {
        for (int kmer : kmers) {

          String outputFile = output + "_k" + kmer;

          contigs =
              assemble(
                  readBuffer.toString(),
                  outputFile,
                  prefix,
                  truncateOnRepeat ? 1 : 0,
                  maxContigs,
                  maxPathsFromRoot,
                  readLength,
                  kmer,
                  minKmerFrequency,
                  minBaseQuality,
                  minEdgeRatio,
                  isDebug ? 1 : 0,
                  maxNodes);

          if (!contigs.equals("<REPEAT>")) {
            break;
          } else {
            if (kmer >= readLength / 2 || kmer >= CYCLE_KMER_LENGTH_THRESHOLD) {
              isCycleExceedingThresholdDetected = true;
            }
          }
        }
      } else {
        //				System.out.println("Skipping assembly for: " + prefix);
      }

    } catch (Exception e) {
      e.printStackTrace();
      throw new RuntimeException(e);
    }

    if (this.shouldSearchForSv) {

      Collections.sort(this.svCandidates);
      Position last = null;
      String currentFeatureChr = null;
      int currentFeatureStart = -1;
      int currentFeatureStop = -1;
      int currentFeatureCount = 0;

      // TODO: Calc this dynamically
      int windowSize = 500;

      for (Position pos : this.svCandidates) {
        if ((last != null)
            && pos.getChromosome().equals(last.getChromosome())
            && Math.abs(pos.getPosition() - last.getPosition()) < windowSize) {

          if (currentFeatureChr == null) {
            currentFeatureChr = pos.getChromosome();
            currentFeatureStart = last.getPosition();
            currentFeatureStop = pos.getPosition() + readLength;
            currentFeatureCount = 1;
          } else {
            currentFeatureStop = pos.getPosition() + readLength;
            currentFeatureCount++;
          }
        } else {
          if (currentFeatureChr != null) {
            if (currentFeatureCount
                > (minReadCount / MAX_READ_LENGTHS_PER_REGION) * minReadCandidateFraction) {
              Feature region =
                  new Feature(
                      currentFeatureChr,
                      currentFeatureStart - readLength,
                      currentFeatureStop + readLength);
              BreakpointCandidate candidate = new BreakpointCandidate(region, currentFeatureCount);
              this.svCandidateRegions.add(candidate);
            }
            currentFeatureChr = null;
            currentFeatureStart = -1;
            currentFeatureStop = -1;
            currentFeatureCount = 0;
          } else {
            currentFeatureChr = pos.getChromosome();
            currentFeatureStart = pos.getPosition();
            currentFeatureStop = pos.getPosition() + readLength;
            currentFeatureCount = 1;
          }
        }
        last = pos;
      }

      // Don't forget last SV candidate region
      if (currentFeatureCount
          > (minReadCount / MAX_READ_LENGTHS_PER_REGION) * minReadCandidateFraction) {
        Feature region =
            new Feature(
                currentFeatureChr,
                currentFeatureStart - readLength,
                currentFeatureStop + readLength);
        BreakpointCandidate candidate = new BreakpointCandidate(region, currentFeatureCount);
        this.svCandidateRegions.add(candidate);
      }
    }

    long end = System.currentTimeMillis();

    int kmer = readLength + 1;
    if (kmers.length > 0) {
      kmer = kmers[0];
    }

    if (isDebug) {
      System.err.println(
          "Elapsed_msecs_in_NativeAssembler\tRegion:\t"
              + regions.get(0).getDescriptor()
              + "\tLength:\t"
              + regions.get(0).getLength()
              + "\tReadCount:\t"
              + readCount
              + "\tElapsed\t"
              + (end - start)
              + "\tAssembled\t"
              + isAssemblyCandidate
              + "\t"
              + kmer);
    }

    return contigs;
  }
Esempio n. 3
0
  public String simpleAssemble(List<SAMRecord> reads) {

    StringBuffer readBuffer = new StringBuffer();

    for (SAMRecord read : reads) {
      readBuffer.append((char) 1);
      readBuffer.append(read.getReadNegativeStrandFlag() ? "1" : "0");

      if (read.getReadString().length() == readLength) {
        readBuffer.append(read.getReadString());
        readBuffer.append(read.getBaseQualityString());
      } else {
        StringBuffer basePadding = new StringBuffer();
        StringBuffer qualPadding = new StringBuffer();

        for (int i = 0; i < readLength - read.getReadString().length(); i++) {
          basePadding.append('N');
          qualPadding.append('!');
        }

        readBuffer.append(read.getReadString() + basePadding.toString());
        readBuffer.append(read.getBaseQualityString() + qualPadding.toString());
      }
    }

    SAMRecord lastRead = reads.get(reads.size() - 1);
    int regionStart = reads.get(0).getAlignmentStart();
    int regionEnd =
        lastRead.getAlignmentEnd() > 0 ? lastRead.getAlignmentEnd() : lastRead.getAlignmentStart();

    String output =
        "region_" + reads.get(0).getReferenceName() + "_" + regionStart + "_" + regionEnd;
    String contigs = "";

    // Make this set of reads eligible for GC
    //		reads.clear();

    for (int kmer : kmers) {

      String outputFile = output + "_k" + kmer;

      contigs =
          assemble(
              readBuffer.toString(),
              outputFile,
              output,
              1, // truncate_on_repeat
              maxContigs,
              maxPathsFromRoot,
              readLength,
              kmer,
              minKmerFrequency,
              minBaseQuality,
              minEdgeRatio,
              isDebug ? 1 : 0,
              maxNodes);

      if (!contigs.equals("<REPEAT>")) {
        break;
      }
    }

    return contigs;
  }