public static ChromosomeArray regenNBlocks( ChromosomeArray cha, int blocksize, int trigger, int endsize) { ChromosomeArray chb = new ChromosomeArray(cha.chromosome, cha.strand, cha.minIndex, cha.maxIndex); chb.maxIndex = -1; int aloc = 0; int bloc = 0; int ns = 0; // Process start while (cha.get(aloc) == 'N') { chb.set(bloc, 'N'); ns++; aloc++; bloc++; } while (ns < endsize) { chb.set(bloc, 'N'); ns++; bloc++; } ns = 0; // Process middle while (aloc <= cha.maxIndex) { byte b = cha.get(aloc); if (b == 'N') { ns++; } else { if (ns >= trigger) { while (ns < blocksize) { chb.set(bloc, 'N'); bloc++; ns++; } } ns = 0; } chb.set(bloc, b); aloc++; bloc++; } // Process end ns = 0; for (int i = chb.maxIndex; i >= 0; i--) { if (chb.get(i) != 'N') { break; } } while (ns < endsize) { chb.set(chb.maxIndex + 1, 'N'); ns++; } return chb; }
/** * @param replaceFirst * @param chromFname * @param chrom */ public static void process(String inVarsName, String outChromName, int chrom) { ArrayList<Varlet> vars = Varlet.fromTextFile(inVarsName); ChromosomeArray cha = Data.getChromosome(chrom); ChromosomeArray chb = new ChromosomeArray(chrom, Gene.PLUS); // Next location to read in a int aloc = 0; // Next location to set in b int bloc = 0; for (int i = 0; i < vars.size(); i++) { Varlet v = vars.get(i); assert (v.beginLoc >= aloc) : i + "\n" + vars.get(i - 1) + "\n" + v + "\n"; // Overlapping variations while (v.beginLoc < aloc) { // skip it, for now. System.err.print("e"); i++; if (i >= vars.size()) { break; } v = vars.get(i); } if (STAY_NEAR_REF && Tools.absdif(aloc, bloc) >= REF_LIMIT) { int dif = v.lengthDif(); if (aloc < bloc) { // skip insertions while (dif > 0) { // System.err.print("i"); i++; if (i >= vars.size()) { break; } v = vars.get(i); dif = v.lengthDif(); } } else { // skip deletions while (dif < 0) { // System.err.print("d"); i++; if (i >= vars.size()) { break; } v = vars.get(i); dif = v.lengthDif(); } } } // Advance to variation's beginning while (aloc < v.beginLoc) { byte b = cha.get(aloc); chb.set(bloc, b); aloc++; bloc++; } // Apply variation if (v.varType == Variation.SNP) { String call = v.call; String ref = v.ref; if (ref != null && ref.equals("=")) { ref = null; } for (int j = 0; j < call.length(); j++) { char c = call.charAt(j); if (ref != null) { assert (ref.charAt(j) == cha.get(aloc)) : "\n" + i + ", " + v; } chb.set(bloc, c); aloc++; bloc++; } } else if (v.varType == Variation.DELINS) { String call = v.call; for (int j = 0; j < call.length(); j++) { char c = call.charAt(j); chb.set(bloc, c); bloc++; } aloc += v.lengthRef(); } else if (v.varType == Variation.NOCALL) { // Do nothing. But, it should have been removed already. if (!foundNocall) { System.err.println("*** Warning - found a nocall in input variations ***"); foundNocall = true; } } else if (v.varType == Variation.NOREF) { String call = v.call; for (int j = 0; j < call.length(); j++) { char c = call.charAt(j); assert (cha.get(aloc) == 'N') : cha.get(aloc); chb.set(bloc, c); aloc++; bloc++; } } else if (v.varType == Variation.INS) { String call = v.call; for (int j = 0; j < call.length(); j++) { char c = call.charAt(j); chb.set(bloc, c); bloc++; } } else if (v.varType == Variation.DEL) { int len = v.lengthRef(); assert (len > 0); aloc += len; } } // Finish writing array while (aloc < cha.array.length || aloc <= cha.maxIndex) { byte c = cha.get(aloc); chb.set(bloc, c); aloc++; bloc++; } System.out.println("Length Shift for chr" + chrom + ": \t" + (bloc - aloc)); Data.unload(chrom, true); cha = null; if (REGEN_N_BLOCKS) { chb = regenNBlocks(chb, N_BLOCK_SIZE, N_BLOCK_TRIGGER, N_BLOCK_END_SIZE); } chb.resize(chb.maxIndex + 1); // Can't do this because it is read later // if(THREADS==1){ReadWrite.writeObjectInThread(cac, outChromName);} // else{ReadWrite.write(cac, outChromName);} ReadWrite.write(chb, outChromName, false); }