private final int removeDiscarded(ArrayList<Read> buffer1, ArrayList<Read> buffer2) {
   int removed = 0;
   if (buffer2 == null) {
     for (int i = 0; i < buffer1.size(); i++) {
       Read a = buffer1.get(i);
       if (a.discarded()) {
         buffer1.set(i, null);
         removed++;
       }
     }
   } else {
     for (int i = 0; i < buffer1.size(); i++) {
       Read a = buffer1.get(i);
       Read b = buffer2.get(i);
       if (a.discarded() || b.discarded()) {
         buffer1.set(i, null);
         buffer2.set(i, null);
         removed++;
       }
     }
   }
   if (removed > 0) {
     Tools.condenseStrict(buffer1);
     if (buffer2 != null) {
       Tools.condenseStrict(buffer2);
     }
   }
   return removed;
 }
  @Override
  boolean processReadPair(Read r1, Read r2) {
    assert (r2 == null);
    final byte[] quals = r1.quality, bases = r1.bases;
    final byte[] match =
        (r1.match == null ? null : !r1.shortmatch() ? r1.match : Read.toLongMatchString(r1.match));
    if (match == null || quals == null || bases == null) {
      return false;
    }

    int subs = 0;
    int indels = 0;
    for (int qpos = 0, mpos = 0, last = quals.length - 1; mpos < match.length; mpos++) {

      final byte m = match[mpos];
      final byte mprev = match[Tools.max(mpos - 1, 0)];
      final byte mnext = match[Tools.min(mpos + 1, match.length - 1)];

      final byte q1 = quals[qpos];
      final byte b2 = bases[qpos];

      int sub = 0, indel = 0;
      if (m == 'S') {
        sub = 1;
      } else if (m == 'I') {
        indel = 1;
      } else if (m == 'm') {
        if (mprev == 'D' || mnext == 'D') {
          indel = 1;
        }
      } else if (m == 'D') {
        // do nothing
      } else if (m == 'C') {
        // do nothing
      } else {
        throw new RuntimeException(
            "Bad symbol m='"
                + ((char) m)
                + "'\n"
                + new String(match)
                + "\n"
                + new String(bases)
                + "\n");
      }
      subs += sub;
      indels += indel;
      if (q1 >= minq && q1 <= maxq) {
        if (sub > 0 || (indel > 0 && countIndels)) {
          return true;
        }
      }

      if (m != 'D') {
        qpos++;
      }
    }
    return keepPerfect && subs == 0 && indels == 0;
  }
Exemplo n.º 3
0
 public Read consensus() { // TODO: Return single read if only 1.
   final int[][] counts = baseCounts();
   final int width = counts[0].length;
   byte[] bases = new byte[width], quals = new byte[width];
   for (int i = 0; i < width; i++) {
     int x = getConsensus(counts, i);
     if (x < 0) {
       //				System.err.println("q="+0+", x="+x+"; A="+counts[0][i]+", C="+counts[1][i]+",
       // G="+counts[2][i]+", T="+counts[3][i]);
       bases[i] = 'N';
       quals[i] = 0;
     } else {
       long q = 2 * counts[x][i] - counts[0][i] - counts[1][i] - counts[2][i] - counts[3][i];
       //				System.err.println("q="+q+", x="+x+"; A="+counts[0][i]+", C="+counts[1][i]+",
       // G="+counts[2][i]+", T="+counts[3][i]);
       bases[i] = AminoAcid.numberToBase[x];
       quals[i] = (byte) Tools.mid(0, q, 50);
     }
   }
   Read leftmost = this.get(0);
   Read r = new Read(bases, quals, 0, leftmost.id);
   // TODO: Attach the long pair, and make sure the kmer location is correct.
   //		assert(false) : "\n"+r.toFastq()+"\nCheck kmer location.";
   //		assert(size()==1) :
   // "\n"+r.toFastq()+"\n"+get(0).toFastq()+"\n"+get(size()-1).toFastq()+"\n";
   return r;
 }
  @Override
  public boolean parseArgument(String arg, String a, String b) {
    //		System.err.println("Calling parseArgument("+arg+","+a+","+b+")");
    if (a.equals("minq")) {
      minq = (int) Tools.parseKMG(b);
      return true;
    } else if (a.equals("maxq")) {
      maxq = (int) Tools.parseKMG(b);
      return true;
    } else if (a.equals("keepperfect")) {
      keepPerfect = Tools.parseBoolean(b);
      return true;
    } else if (a.equals("countindels")) {
      countIndels = Tools.parseBoolean(b);
      return true;
    }

    // There was no match to the argument
    return false;
  }
  private final void pair(ArrayList<Read> buffer1, ArrayList<Read> buffer2) {
    final int len1 = buffer1.size(), len2 = buffer2.size();
    assert (ALLOW_UNEQUAL_LENGTHS || len1 == len2)
        : "\nThere appear to be different numbers of reads in the paired input files."
            + "\nThe pairing may have been corrupted by an upstream process.  It may be fixable by running repair.sh.";
    final int lim = Tools.min(len1, len2);

    for (int i = 0; i < lim; i++) {
      Read a = buffer1.get(i);
      Read b = buffer2.get(i);

      assert (a.numericID == b.numericID)
          : "\n"
              + a.numericID
              + ", "
              + b.numericID
              + "\n"
              + a.toText(false)
              + "\n"
              + b.toText(false)
              + "\n";
      assert (a.mate == null)
          : "Please set interleaved=false when using dual input files.\n"
              + a.id
              + "\n"
              + a.mate.id
              + "\n"
              + b.id
              + "\n"
              + producer1
              + "\n"
              + producer2;
      assert (b.mate == null) : "Please set interleaved=false when using dual input files.";
      a.mate = b;
      b.mate = a;

      assert (a.pairnum() == 0);
      b.setPairnum(1);
      //		assert(a.pairnum()!=b.pairnum());
    }

    if (len1 > len2) {
      // do nothing;
    } else if (len2 > len1) {
      for (int i = lim; i < len2; i++) {
        Read b = buffer2.get(i);
        b.setPairnum(0);
        buffer1.add(b);
      }
    }
  }
Exemplo n.º 6
0
  /** This will create a count consensus of the bases at each position in the cluster. */
  public int[][] baseCounts() {
    int maxLeft = -1, maxRight = -1;
    for (Read r : this) {
      long[] obj = (long[]) r.obj;
      int pos = (int) obj[1];
      maxLeft = Tools.max(maxLeft, pos);
      maxRight = Tools.max(maxRight, r.length() - pos);
    }
    final int width = maxLeft + maxRight;
    //		assert(size()==1) : "\nleft="+maxLeft+", right="+maxRight+", width="+width+",
    // "+k+"\n"+get(0).toFastq()+"\n"+get(size()-1).toFastq();

    //		System.err.println("\n\n");
    final int[][] counts = new int[4][width];
    for (Read r : this) {
      long[] obj = (long[]) r.obj;
      int pos = (int) obj[1];
      byte[] bases = r.bases, quals = r.quality;
      //			System.err.println("pos="+pos+", maxLeft="+maxLeft);
      for (int cloc = 0, rloc = maxLeft - pos; cloc < bases.length; cloc++, rloc++) {
        //				System.err.println("cloc="+cloc+"/"+bases.length+", rloc="+rloc+"/"+width);
        int x = AminoAcid.baseToNumber[bases[cloc]];
        if (x > -1) {
          int q = (quals == null ? 20 : quals[cloc]);
          counts[x][rloc] += q;
        }
      }
    }
    //		if(size()>0){//Looks correct.
    //			System.err.println(Arrays.toString(counts[0]));
    //			System.err.println(Arrays.toString(counts[1]));
    //			System.err.println(Arrays.toString(counts[2]));
    //			System.err.println(Arrays.toString(counts[3]));
    //		}
    return counts;
  }
  public void fill(Read r, long[] kmers) {
    final byte[] bases = r.bases;
    long kmer = 0;
    long rkmer = 0;
    int len = 0;

    if (bases == null || bases.length < k) {
      return;
    }

    long[] mods = local2.get();
    if (mods == null) {
      mods = new long[comparisons];
      local2.set(mods);
    }
    Arrays.fill(mods, -1);

    for (int i = 0; i < bases.length; i++) {
      byte b = bases[i];
      long x = Dedupe.baseToNumber[b];
      long x2 = Dedupe.baseToComplementNumber[b];
      kmer = ((kmer << 2) | x) & mask;
      rkmer = (rkmer >>> 2) | (x2 << shift2);
      if (b == 'N') {
        len = 0;
      } else {
        len++;
      }
      if (len >= k) {
        final long kmax = Tools.max(kmer, rkmer);
        for (int j = 0; j < comparisons; j++) {
          final long div = divisors[j];
          final long mod = kmax % div;
          if (mod > mods[j]) {
            mods[j] = mod;
            kmers[j] = kmax;
          }
        }
      }
    }
  }
  public static void main(String[] args) {

    Timer t = new Timer();

    String in = args[0];
    String outF = args.length > 1 ? args[1] : null;
    String outR = args.length > 2 ? args[2] : null;
    String outU = args.length > 3 ? args[3] : null;
    if (args.length > 4) {
      if (args[4].equalsIgnoreCase("header")) {
        includeHeader = true;
      }
    }

    ByteFile tf = ByteFile.makeByteFile(in, true, false);

    Tools.testForDuplicateFiles(true, in, outF, outR, outU);
    Tools.testOutputFiles(true, false, false, outF, outR, outU);

    final ByteStreamWriter fStream, rStream, uStream;

    fStream = (outF == null ? null : new ByteStreamWriter(outF, true, false, true));
    rStream = (outR == null ? null : new ByteStreamWriter(outR, true, false, true));
    uStream = (outU == null ? null : new ByteStreamWriter(outU, true, false, true));

    if (fStream != null) {
      fStream.start();
    }
    if (rStream != null) {
      rStream.start();
    }
    if (uStream != null) {
      uStream.start();
    }

    long plus = 0;
    long minus = 0;
    long other = 0;

    byte[] s = null;
    for (s = tf.nextLine(); s != null; s = tf.nextLine()) {
      if (s.length > 0) {
        byte c = s[0];
        if (c == '@') {
          if (includeHeader) {
            if (fStream != null) {
              fStream.println(s);
            }
            if (rStream != null) {
              rStream.println(s);
            }
            if (uStream != null) {
              uStream.println(s);
            }
          }
        } else {
          int flag = SamLine.parseFlagOnly(s);
          if (SamLine.mapped(flag)) {
            if (SamLine.strand(flag) == 0) {
              if (fStream != null) {
                fStream.println(s);
              }
              plus++;
            } else {
              if (rStream != null) {
                rStream.println(s);
              }
              minus++;
            }
          } else {
            if (uStream != null) {
              uStream.println(s);
            }
            other++;
          }
        }
      }
    }
    tf.close();
    if (fStream != null) {
      fStream.poisonAndWait();
    }
    if (rStream != null) {
      rStream.poisonAndWait();
    }
    if (uStream != null) {
      uStream.poisonAndWait();
    }

    System.err.println("Total reads:   \t" + (plus + minus + other));
    System.err.println("Plus reads:    \t" + (plus));
    System.err.println("Minus reads:   \t" + (minus));
    System.err.println("Unmapped reads:\t" + (other));

    t.stop();

    System.err.println("Time:          \t" + t);
  }
  public static void main(String[] args) {
    System.err.println(
        "Executing "
            + (new Object() {}.getClass().getEnclosingClass().getName())
            + " "
            + Arrays.toString(args)
            + "\n");
    Timer t = new Timer();

    String inPattern = args[0];

    int minChrom = -1;
    int maxChrom = -1;
    int outgenome = -1;
    Data.GENOME_BUILD = -1;
    String name = null;

    for (int i = 1; i < args.length; i++) {
      final String arg = args[i].toLowerCase();
      String[] split = arg.split("=");
      String a = split[0];
      String b = (split.length > 1 ? split[1] : null);

      if (a.equals("ingenome")) {
        Data.setGenome(Integer.parseInt(b));
        if (minChrom == -1) {
          minChrom = 1;
        }
        if (maxChrom == -1) {
          maxChrom = Data.numChroms;
        }
      } else if (a.equals("outgenome")) {
        outgenome = Integer.parseInt(b);
      } else if (a.equals("minchrom")) {
        minChrom = Integer.parseInt(b);
      } else if (a.equals("maxchrom")) {
        maxChrom = Integer.parseInt(b);
      } else if (a.equals("threads") || a.equals("t")) {
        THREADS = Integer.parseInt(b);
      } else if (a.equals("nblocksize")) {
        N_BLOCK_SIZE = Integer.parseInt(b);
      } else if (a.equals("nblocktrigger")) {
        N_BLOCK_TRIGGER = Integer.parseInt(b);
      } else if (a.equals("staynearref")) {
        STAY_NEAR_REF = Tools.parseBoolean(b);
      } else if (a.equals("append") || a.equals("app")) {
        append = ReadStats.append = Tools.parseBoolean(b);
      } else if (a.equals("overwrite") || a.equals("ow")) {
        overwrite = Tools.parseBoolean(b);
      } else if (a.startsWith("regen")) {
        REGEN_N_BLOCKS = Tools.parseBoolean(b);
      } else if (a.startsWith("name=")) {
        REGEN_N_BLOCKS = Tools.parseBoolean(b);
      } else {
        System.err.println("Unknown argument " + arg);
      }
    }

    assert (Data.GENOME_BUILD > -1);
    assert (outgenome > -1);
    //		assert(Data.GENOME_BUILD!=outgenome);
    if (Data.GENOME_BUILD == outgenome) {
      System.out.println("Warning! Overwriting input genome " + outgenome);
    }

    String fname = Data.chromFname(minChrom, outgenome);
    File f = new File(fname.substring(0, fname.lastIndexOf('/')));
    //		assert(false) : f.getAbsolutePath();
    if (!f.exists()) {
      f.mkdirs();
    }

    for (int chrom = minChrom; chrom <= maxChrom; chrom++) {
      String outName = Data.chromFname(chrom, outgenome);
      assert (overwrite || !new File(outName).exists())
          : "Destination " + outName + " already exists.";
      //			assert(false) : inPattern+", "+outName;
      process(inPattern.replaceFirst("#", "" + chrom), outName, chrom);
    }

    FastaToChromArrays2.writeInfo(
        outgenome,
        maxChrom,
        (name == null ? Data.name : name),
        "" + Data.GENOME_BUILD + "_plus_variations",
        false,
        false);

    t.stop();

    {
      String path = IndexMaker4.fname(1, 1, 12, 1);
      int lastSlash = path.lastIndexOf('/');
      path = path.substring(0, lastSlash);
      File dir = new File(path);
      if (dir.exists()) {
        System.out.println("Deleting old index for " + outgenome);
        for (File f2 : dir.listFiles()) {
          if (f2.isFile() && (f2.getName().contains(".int2d") || f2.getName().endsWith(".txt"))) {
            f2.delete();
          }
        }
      }
    }

    //		System.out.println("Vars in: \t"+VARS_IN);
    //		System.out.println("Vars out:\t"+VARS_OUT);
    System.out.println();
    System.out.println("Time: \t" + t);
  }
  /**
   * @param replaceFirst
   * @param chromFname
   * @param chrom
   */
  public static void process(String inVarsName, String outChromName, int chrom) {
    ArrayList<Varlet> vars = Varlet.fromTextFile(inVarsName);
    ChromosomeArray cha = Data.getChromosome(chrom);
    ChromosomeArray chb = new ChromosomeArray(chrom, Gene.PLUS);

    // Next location to read in a
    int aloc = 0;
    // Next location to set in b
    int bloc = 0;

    for (int i = 0; i < vars.size(); i++) {

      Varlet v = vars.get(i);
      assert (v.beginLoc >= aloc)
          : i + "\n" + vars.get(i - 1) + "\n" + v + "\n"; // Overlapping variations

      while (v.beginLoc < aloc) { // skip it, for now.
        System.err.print("e");
        i++;
        if (i >= vars.size()) {
          break;
        }
        v = vars.get(i);
      }

      if (STAY_NEAR_REF && Tools.absdif(aloc, bloc) >= REF_LIMIT) {
        int dif = v.lengthDif();

        if (aloc < bloc) { // skip insertions
          while (dif > 0) {
            //						System.err.print("i");
            i++;
            if (i >= vars.size()) {
              break;
            }
            v = vars.get(i);
            dif = v.lengthDif();
          }
        } else { // skip deletions
          while (dif < 0) {
            //						System.err.print("d");
            i++;
            if (i >= vars.size()) {
              break;
            }
            v = vars.get(i);
            dif = v.lengthDif();
          }
        }
      }

      // Advance to variation's beginning
      while (aloc < v.beginLoc) {
        byte b = cha.get(aloc);
        chb.set(bloc, b);
        aloc++;
        bloc++;
      }

      // Apply variation
      if (v.varType == Variation.SNP) {
        String call = v.call;
        String ref = v.ref;
        if (ref != null && ref.equals("=")) {
          ref = null;
        }
        for (int j = 0; j < call.length(); j++) {
          char c = call.charAt(j);
          if (ref != null) {
            assert (ref.charAt(j) == cha.get(aloc)) : "\n" + i + ", " + v;
          }
          chb.set(bloc, c);
          aloc++;
          bloc++;
        }
      } else if (v.varType == Variation.DELINS) {
        String call = v.call;
        for (int j = 0; j < call.length(); j++) {
          char c = call.charAt(j);
          chb.set(bloc, c);
          bloc++;
        }
        aloc += v.lengthRef();
      } else if (v.varType == Variation.NOCALL) {
        // Do nothing.  But, it should have been removed already.
        if (!foundNocall) {
          System.err.println("*** Warning - found a nocall in input variations ***");
          foundNocall = true;
        }
      } else if (v.varType == Variation.NOREF) {
        String call = v.call;
        for (int j = 0; j < call.length(); j++) {
          char c = call.charAt(j);
          assert (cha.get(aloc) == 'N') : cha.get(aloc);
          chb.set(bloc, c);
          aloc++;
          bloc++;
        }
      } else if (v.varType == Variation.INS) {
        String call = v.call;
        for (int j = 0; j < call.length(); j++) {
          char c = call.charAt(j);
          chb.set(bloc, c);
          bloc++;
        }
      } else if (v.varType == Variation.DEL) {
        int len = v.lengthRef();
        assert (len > 0);
        aloc += len;
      }
    }

    // Finish writing array
    while (aloc < cha.array.length || aloc <= cha.maxIndex) {
      byte c = cha.get(aloc);
      chb.set(bloc, c);
      aloc++;
      bloc++;
    }

    System.out.println("Length Shift for chr" + chrom + ": \t" + (bloc - aloc));

    Data.unload(chrom, true);
    cha = null;

    if (REGEN_N_BLOCKS) {
      chb = regenNBlocks(chb, N_BLOCK_SIZE, N_BLOCK_TRIGGER, N_BLOCK_END_SIZE);
    }

    chb.resize(chb.maxIndex + 1);

    // Can't do this because it is read later
    //		if(THREADS==1){ReadWrite.writeObjectInThread(cac, outChromName);}
    //		else{ReadWrite.write(cac, outChromName);}

    ReadWrite.write(chb, outChromName, false);
  }
  public static void main(String[] args) {
    String in1 = args[0];
    String in2 =
        (args.length < 2 || args[1].equalsIgnoreCase("null") || args[1].contains("=")
            ? null
            : args[1]);
    if (in2 != null) {
      assert (!in1.equalsIgnoreCase(in2));
      FASTQ.TEST_INTERLEAVED = false;
    } else {
      FASTQ.TEST_INTERLEAVED = true;
      FASTQ.FORCE_INTERLEAVED = true;
    }

    long maxReads = -1;
    for (int i = 1; i < args.length; i++) {
      final String arg = args[i];
      final String[] split = arg.split("=");
      String a = split[0].toLowerCase();
      String b = (split.length > 1 ? split[1] : "true");

      if (Parser.isJavaFlag(arg)) {
        // jvm argument; do nothing
      } else if (Parser.parseZip(arg, a, b)) {
        // do nothing
      } else if (Parser.parseQuality(arg, a, b)) {
        // do nothing
      } else if (Parser.parseFasta(arg, a, b)) {
        // do nothing
      } else if (a.equals("reads") || a.startsWith("maxreads")) {
        maxReads = Tools.parseKMG(b);
      } else {
        throw new RuntimeException("Unknown parameter " + args[i]);
      }
    }

    Parser.processQuality();

    assert (FastaReadInputStream.settingsOK());
    Timer t = new Timer();

    ConcurrentReadInputStream cris = getReadInputStream(maxReads, false, true, in1, in2);
    System.out.println("Fetched " + cris.getClass().getName());
    {
      Object[] p = cris.producers();
      //			while(p[0]==null){
      //				p=cris.producers();
      //			}
      System.out.print("Producers: ");
      String comma = "";
      for (Object o : p) {
        System.out.print(comma + (o == null ? "null" : o.getClass().getName()));
        comma = ", ";
      }
      System.out.println();
    }
    boolean paired = cris.paired();
    System.out.println("paired=" + paired);
    cris.start(); // 4567

    ListNum<Read> ln = cris.nextList();
    ArrayList<Read> reads = (ln != null ? ln.list : null);

    if (reads != null && !reads.isEmpty()) {
      Read r = reads.get(0);
      assert ((r.mate != null) == paired);
    }

    long readCount = 0;
    long baseCount = 0;

    while (reads != null && reads.size() > 0) {

      for (Read r : reads) {
        Read r2 = r.mate;
        if (r != null) {
          readCount++;
          if (r.bases != null) {
            baseCount += r.length();
          }
        }
        if (r2 != null) {
          readCount++;
          if (r2.bases != null) {
            baseCount += r2.length();
          }
        }
      }
      cris.returnList(ln.id, ln.list.isEmpty());
      //			System.err.println("fetching list");
      ln = cris.nextList();
      reads = (ln != null ? ln.list : null);
      //			System.out.println("reads: "+(reads==null ? "null" : reads.size()));
    }
    System.err.println("Finished reading");
    cris.returnList(ln.id, ln.list.isEmpty());

    cris.close();
    t.stop();

    System.out.println("Reads:      \t" + readCount);
    System.out.println("Bases:      \t" + baseCount);
    System.out.println("Avg Length: \t" + String.format("%.2f", baseCount * 1.0 / readCount));
    System.out.println("Time:      \t" + t);
  }