@Override
  public void execute() {
    TableReader tr = new TableReader(KMER_REFERENCE_PANEL);
    Set<CortexKmer> kmers = new HashSet<CortexKmer>();

    int kmerSize = 0;
    for (Map<String, String> te : tr) {
      CortexKmer kmer = new CortexKmer(te.get("kmer"));

      kmers.add(kmer);

      if (kmerSize == 0) {
        kmerSize = kmer.length();
      }
    }

    ReferenceSequence rseq;
    while ((rseq = SUPERNODES.nextSequence()) != null) {
      String seq = new String(rseq.getBases());

      for (int i = 0; i <= seq.length() - kmerSize; i++) {
        CortexKmer kmer = new CortexKmer(seq.substring(i, i + kmerSize));

        if (kmers.contains(kmer)) {
          out.println(">" + rseq.getName() + "\n" + seq);
          break;
        }
      }
    }
  }
Beispiel #2
0
  @Override
  public void execute() {
    ReferenceSequence rseq;
    while ((rseq = REF.nextSequence()) != null) {
      String seq = new String(rseq.getBases());

      for (int i = 0; i + LENGTH < rseq.length(); i += LENGTH) {
        String slice = seq.substring(i, i + LENGTH);

        out.println(">" + rseq.getName() + "." + i);
        out.println(slice);
      }
    }
  }
Beispiel #3
0
  private void loadContigs() {
    if (contigs == null || contigs.size() == 0) {
      contigs = new HashMap<String, String>();

      TableReader tr = new TableReader(METRICS);
      metrics = new HashMap<String, Map<String, String>>();

      for (Map<String, String> te : tr) {
        String contigName = te.get("contigName");
        String seq = te.get("seq");

        contigs.put(contigName, seq);

        metrics.put(contigName, te);
      }
    }

    FastaSequenceFile fasta = new FastaSequenceFile(CONTIGS, false);
    ReferenceSequence rseq;
    while ((rseq = fasta.nextSequence()) != null) {
      String[] name = rseq.getName().split("\\s+");
      contigs.put(name[0], new String(rseq.getBases()));
    }
  }
Beispiel #4
0
    public GeneralizedSequenceReader(File file) {
      if (file.getName().endsWith(".bam")) {
        sfr = new SAMFileReader(file);
        sfr.setValidationStringency(ValidationStringency.SILENT);
      } else if (file.getName().endsWith(".fastq")
          || file.getName().endsWith(".fq")
          || file.getName().endsWith(".fastq.gz")
          || file.getName().endsWith(".fq.gz")) {
        fqr = new FastqReader(file, true);
      } else if (file.getName().endsWith(".fasta") || file.getName().endsWith(".fa")) {
        fsr = new FastaSequenceFile(file, false);
        fss = new LinkedHashSet<String>();

        ReferenceSequence rseq;
        while ((rseq = fsr.nextSequence()) != null) {
          fss.add(new String(rseq.getBases()));
        }
      } else {
        throw new IndianaException(
            "Cannot parse '" + file.getAbsolutePath() + "' with generalized reader");
      }
    }