private void updateProgress() { if (mBasicProgress) { synchronized (mJobs) { mTotalJobsFinished++; final String message = mThreadPoolName + ": " + mTotalJobsFinished + "/" + mTotalJobs + " Jobs Finished"; Diagnostic.progress(message); Diagnostic.developerLog(message); } } }
/** * Alternative to other one with similar name * * @param directory directory containing SDF * @param indexFile index file * @param loadNames should we load names * @param loadFullNames should we load full names * @param region region to restrict to * @throws IOException IO exception occurs */ CompressedMemorySequencesReader2( File directory, IndexFile indexFile, boolean loadNames, boolean loadFullNames, LongRange region) throws IOException { mIndexFile = indexFile; mRegion = SequencesReaderFactory.resolveRange(indexFile, region); mStart = mRegion.getStart(); mEnd = mRegion.getEnd(); mNumberSequences = mEnd - mStart; mData = DataInMemory.loadDelayQuality( directory, indexFile, DataFileIndex.loadSequenceDataFileIndex(indexFile.dataIndexVersion(), directory), mStart, mEnd); if (mNumberSequences > Integer.MAX_VALUE) { throw new IllegalArgumentException( "Too many sequences in region: " + region + ", maximum is: " + Integer.MAX_VALUE); } mDirectory = directory; if (loadNames && mIndexFile.hasNames()) { loadNames(); loadNameSuffixes(loadFullNames, mIndexFile.hasSequenceNameSuffixes()); } final StringBuilder sb = new StringBuilder("CMSR2 statistics"); sb.append(LS); this.infoString(sb); Diagnostic.userLog(sb.toString()); }
@Override public void close() throws IOException { mReader.close(); Diagnostic.developerLog( "There were " + mDoubleFetched + " tabixed records double-fetched due to overlapping blocks"); }
/** * Reads a set of read group priors. * * @param remap optional read group id remapping (used to merge read group ids) * @param files the files containing the priors. * @return a map from read group name to a ReadGroupStats object. * @exception IOException if an error occurs. */ static Map<String, ReadGroupStats> loadReadGroupStats(Map<String, String> remap, File... files) throws IOException { final HashMap<String, ReadGroupStats> rgStats = new HashMap<>(); Diagnostic.userLog("Loading read group statistics"); for (final File statsFile : files) { try (BufferedReader br = new BufferedReader(new FileReader(statsFile))) { String line; while ((line = br.readLine()) != null) { if (line.length() > 0) { if (line.startsWith("#")) { if (line.startsWith("#Version") && !line.contains(ReadGroupStatsCalculator.VERSION)) { throw new NoTalkbackSlimException( "Unsupported rgstats version: " + line + " - rerun svprep"); } } else { try { final ReadGroupStats stats = new ReadGroupStats(remap, line); if (rgStats.containsKey(stats.id())) { rgStats.get(stats.id()).add(stats); } else { rgStats.put(stats.id(), stats); } } catch (final IllegalArgumentException e) { throw new IOException(e.getMessage()); } } } } if (rgStats.size() == 0) { throw new IOException("No read group statistics contained in file " + statsFile); } } } for (final ReadGroupStats stats : rgStats.values()) { stats.calculate(); Diagnostic.userLog(stats.toString()); if (stats.detectOverflow()) { throw new NoTalkbackSlimException( "Overflow detected in read group statistics calculation for read group: " + stats.id()); } else if (!stats.isValid()) { throw new NoTalkbackSlimException( "Invalid read group statistics for read group: " + stats.id()); } } return rgStats; }
public void testArrayPrereadNames() { Diagnostic.setLogStream(); final String seqString = "acgtcacgtcacgtcacgtcacgtcacgtcacgtc"; new MyMemorySequencesReader( new byte[][] {DnaUtils.encodeArray(seqString.getBytes())}, new String[] {"seq1"}, new long[] {35}, 35, 35, SequenceType.DNA); }
/** * Load the names if they haven't already been loaded. * * @throws IOException if an I/O related error occurs */ private void loadNames() throws IOException { mNames = new PrereadNames(mDirectory, mRegion, false); if (mIndexFile.getVersion() >= IndexFile.SEPARATE_CHECKSUM_VERSION && mRegion.getStart() == 0 && mRegion.getEnd() == mIndexFile.getNumberSequences()) { if (mNames.calcChecksum() != mIndexFile.getNameChecksum()) { throw new CorruptSdfException( "Sequence names failed checksum - SDF may be corrupt: \"" + mDirectory + "\""); } else { Diagnostic.developerLog("Sequence names passed checksum"); } } }
/** * Constructor for a thread pool. Basic progress output enabled by default. * * @param numberThreads maximum number of threads that will be used. * @param subname textual label to use in threads. * @param logLifecycleEvents logs thread life cycle events */ public SimpleThreadPool( final int numberThreads, final String subname, boolean logLifecycleEvents) { mThreadPoolName = subname; assert numberThreads > 0; mMaxThreads = numberThreads; mQueueThread = new QueueThread( "SimpleThreadPool-" + mThreadPoolName + "-Queue", subname, logLifecycleEvents); mQueueThread.setDaemon(true); mQueueThread.start(); Diagnostic.developerLog( mThreadPoolName + ": Starting SimpleThreadPool with maximum " + numberThreads + " threads"); }
public void testBad() { Diagnostic.setLogStream(); final SAMRecord rec = new SAMRecord(new SAMFileHeader()); rec.setReadString("TATTAGGATTGAGACTGGTAAAATGGNCCACCAAG"); rec.setBaseQualityString("/725361840-525251.68,0,.52!222254/2"); final VariantParams params = VariantParams.builder().create(); try { new AlignmentEnvironmentCG( new VariantAlignmentRecord(rec), params, MachineErrorParams.builder().machine(MachineType.COMPLETE_GENOMICS).create(), null); fail(); } catch (final NoTalkbackSlimException e) { assertTrue(e.getMessage(), e.getMessage().startsWith("Invalid CG alignment.")); } }
private void loadNameSuffixes(boolean attemptLoad, boolean suffixExists) throws IOException { mNameSuffixes = attemptLoad && suffixExists ? new PrereadNames(mDirectory, mRegion, true) : new EmptyStringPrereadNames(mEnd - mStart); if (attemptLoad && suffixExists) { if (mRegion.getStart() == 0 && mRegion.getEnd() == mIndexFile.getNumberSequences()) { if (mNameSuffixes.calcChecksum() != mIndexFile.getNameSuffixChecksum()) { throw new CorruptSdfException( "Sequence name suffixes failed checksum - SDF may be corrupt: \"" + mDirectory + "\""); } else { Diagnostic.developerLog("Sequence name suffixes passed checksum"); } } } }
/** * Reads a set of read group name remappings from source name to destination name. This is used to * merge multiple read groups into one. Format is tab-separated lines consisting of input read * group name, output read group name. * * @param relabelFile the file containing the group name mappings. * @return a map from input read group name to output read group name. * @exception IOException if an error occurs. */ static Map<String, String> loadRelabelFile(File relabelFile) throws IOException { final HashMap<String, String> remap = new HashMap<>(); Diagnostic.userLog("Loading read group relabelling file"); try (BufferedReader br = new BufferedReader(new FileReader(relabelFile))) { String line; while ((line = br.readLine()) != null) { if (!line.startsWith("#") && line.length() > 0) { final String[] parts = line.split("\t"); if (parts.length != 2) { throw new IOException("Expected input_name<tab>output_name on line: " + line); } remap.put(parts[0], parts[1]); } } if (remap.size() == 0) { throw new IOException("No read group relabellings contained in file " + relabelFile); } } return remap; }
private void populateNext(boolean force) throws IOException { final int previousStart = mNextAlignmentStart; final int previousTemplateId = mNextTemplateId; mNextRecord = null; if (force) { advanceSubIterator(); } while (mCurrentOffset <= mOffsets.size()) { if (!mBuffered && !mReader .hasNext()) { // Only happens when stream is exhausted, so effectively just closes // things out. advanceSubIterator(); } else { if (mBuffered) { mBuffered = false; } else { mReader.next(); } final String refName = mReader.getReferenceName(); final Integer refId = mSequenceLookup.get( refName); // Note that we cannot rely on mReader.getReferenceId in this scenario, // as that is built up incrementally if (refId == null) { throw new IOException( "Tabixed input contained a sequence name not found in the corresponding index: " + refName); } if (refId > mCurrentTemplate) { // current offset has exceeded region and block overlapped next // template mBuffered = true; advanceSubIterator(); } else { if (refId < mCurrentTemplate) { // Current block may occasionally return records from the // previous template if the block overlaps // Diagnostic.developerLog("Ignoring record from earlier template at " + // mReader.getReferenceName() + ":" + (mReader.getStartPosition() + 1) + " (" + refId // + "<" + mCurrentTemplate + ")"); continue; } final int alignmentStart = mReader.getStartPosition(); final int alignmentEnd = alignmentStart + mReader.getLengthOnReference(); if (alignmentEnd <= mCurrentRegion.getStart()) { // before region // Diagnostic.developerLog("Ignoring record from earlier than start at " + // mReader.getReferenceName() + ":" + (mReader.getStartPosition() + 1)); continue; } if (alignmentStart <= mPreviousAlignmentStart) { // this record would have been already returned by an // earlier region // Diagnostic.developerLog("Ignoring record from earlier block at " + // mReader.getReferenceName() + ":" + (alignmentStart + 1)); mDoubleFetched++; if (mDoubleFetched % 100000 == 0) { Diagnostic.developerLog( "Many double-fetched records noticed at " + mReader.getReferenceName() + ":" + (alignmentStart + 1) + " in region " + mCurrentRegion + " (skipping through to " + mPreviousAlignmentStart + ")"); } continue; } if (alignmentStart >= mCurrentRegion .getEnd()) { // past current region, advance the iterator and record the // furtherest we got if (previousStart != Integer.MIN_VALUE && previousTemplateId == mCurrentTemplate) { mPreviousAlignmentStart = previousStart; } else { mPreviousAlignmentStart = Integer.MIN_VALUE; } mBuffered = true; advanceSubIterator(); continue; } mNextRecord = mReader.getRecord(); mNextTemplateId = mCurrentTemplate; mNextAlignmentStart = alignmentStart; break; } } } }
@Override public void run() { try { Diagnostic.developerLog(mSubName + ": Started"); while (mProcessJobs) { boolean localBusy = false; synchronized (mJobs) { for (final WorkerThread t : mThreads) { if (!t.hasJob()) { if (!mJobs.isEmpty()) { t.enqueueJob(mJobs.remove()); localBusy = true; if (mLogLifecycleEvents) { Diagnostic.developerLog( mSubName + ": New Job Started by thread: " + t.getName() + " - " + mJobs.size() + " Jobs Left Queued"); } } } else { localBusy = true; } } while (!mJobs.isEmpty() && mThreads.size() < mMaxThreads) { final WorkerThread t = new WorkerThread(mSubName + "-" + mThreads.size(), mJobs); mThreads.add(t); if (mLogLifecycleEvents) { Diagnostic.developerLog( mSubName + ": Worker Thread Created - " + t.getName() + " - " + mThreads.size() + "/" + mMaxThreads + " Threads"); } t.enqueueJob(mJobs.remove()); t.start(); localBusy = true; if (mLogLifecycleEvents) { Diagnostic.developerLog( mSubName + ": New Job Started by thread: " + t.getName() + " - " + mJobs.size() + " Jobs Left Queued"); } } mBusy = localBusy; mJobs.notifyAll(); try { if (mProcessJobs) { mJobs.wait(NOT_DONE_SLEEP_TIME); } } catch (final InterruptedException e) { // dont care } } } } catch (final Throwable t) { mThrown = t; mProcessJobs = false; ProgramState.setAbort(); } finally { for (final WorkerThread t : mThreads) { t.die(); } mBusy = false; synchronized (mJobs) { mJobs.clear(); } Diagnostic.developerLog(mSubName + ": Finished"); synchronized (this) { mQueueDone = true; notifyAll(); } } }
/** * Enable the basic progress output with the total number of jobs that this thread pool will * process. * * @param totalJobs the total number of jobs this thread pool will be processing. */ public void enableBasicProgress(long totalJobs) { mTotalJobs = totalJobs; mBasicProgress = true; Diagnostic.progress(mThreadPoolName + ": Starting " + mTotalJobs + " Jobs"); }
@Override public void setUp() throws IOException { Diagnostic.setLogStream(); mDir = FileHelper.createTempDirectory(); }
public void testFilterUnmated() throws IOException { final ByteArrayOutputStream log = new ByteArrayOutputStream(); try (PrintStream prLog = new PrintStream(log)) { Diagnostic.setLogStream(prLog); final int numReads = 100; final MapQScoringReadBlocker blocker = new MapQScoringReadBlocker(numReads, 2); blocker.increment(1, 3); blocker.increment(20, 4); blocker.increment(20, 4); blocker.increment(20, 4); // read 20 is blocked for score=4 blocker.increment(3, 1); blocker.increment(3, 1); blocker.increment(3, 1); // read 3 is blocked for score=1 blocker.increment(3, 0); blocker.increment(3, 0); // read 3 is just not blocked for score=0 final File dir = FileUtils.createTempDir("test", "unmatedSamFilter"); OutputStream out = null; try { final File in1 = File.createTempFile("sam", "_1.gz", dir); writeTempFile(in1); final File outFile = File.createTempFile("out", ".gz", dir); out = new GZIPOutputStream(new FileOutputStream(outFile)); final StatusListener listener = new StatusListener(numReads); final ReadBlocker freqBlocker = new ReadBlocker(numReads, 2); freqBlocker.increment(66); freqBlocker.increment(66); final MockSequencesReader msr = new MockSequencesReader(SequenceType.DNA) { @Override public PrereadType getPrereadType() { return PrereadType.UNKNOWN; } @Override public boolean hasQualityData() { return true; } @Override public int read(long sequenceIndex, byte[] dataOut) { dataOut[0] = 1; if (sequenceIndex == 3) { dataOut[1] = 3; dataOut[2] = 3; } else { dataOut[1] = 3; dataOut[2] = 2; } dataOut[3] = 4; return 4; } @Override public int readQuality(final long sequenceIndex, final byte[] dest) { dest[0] = dest[1] = dest[2] = dest[3] = '<' - 33; return 4; } }; final SingleEndSamResultsFilter filter = new SingleEndSamResultsFilter(blocker, freqBlocker, listener, 0, msr, null, false); assertEquals("Alignment", filter.getName()); filter.filterConcat(makeHeader(), out, null, null, mTemplateReader, false, in1); out.close(); final String contents = FileHelper.gzFileToString(outFile); // System.out.println("contents=" + contents); assertTrue( TestUtils.sameLines(SAM_UNMATED_EXPECTED, TestUtils.stripSAMHeader(contents), false)); // now check that the listener has been updated correctly. for (int read = 0; read < numReads; read++) { final int expect; switch (read) { case 1: case 3: expect = ReadStatusTracker.UNMATED_FIRST; break; default: expect = 0; break; } assertEquals("readId=" + read, expect, listener.getStatus(read)); } } finally { if (out != null) { out.close(); } assertTrue(FileHelper.deleteAll(dir)); } } finally { Diagnostic.setLogStream(); } final String logString = log.toString(); // System.err.println(logString); TestUtils.containsAll(logString, "Alignment SAM filter outputs 2/5 records"); }
/** * @param calibrator the mapping calibration stats to use when computing coverage * @param defaultSequenceLengths map of sequence names to sequence length, used if not already set * in the calibrator * @param readGroupToSampleId read group to sample id * @param restriction a region restriction, may be null */ public CalibratedPerSequenceExpectedCoverage( Calibrator calibrator, Map<String, Integer> defaultSequenceLengths, Map<String, String> readGroupToSampleId, RegionRestriction restriction) { mSequenceSampleCoverages = new HashMap<>(); mSumCoverages = new HashMap<>(); mSamples = Collections.unmodifiableSet(new HashSet<>(readGroupToSampleId.values())); final Map<String, Integer> sequenceLengths = calibrator.hasLengths() ? calibrator.getSequenceLengths() : defaultSequenceLengths; if (calibrator.getCovariateIndex(CovariateEnum.SEQUENCE) == -1) { // No per sequence separation in calibration data, calculate per-genome coverage // level long length = 0; for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { length += entry.getValue(); } final Map<String, HashMap<String, CalibrationStats>> local = new HashMap<>(); findIndividualGlobalCoverage(calibrator, readGroupToSampleId, local); double currentMax = 0; double currentSum = 0; for (Map.Entry<String, HashMap<String, CalibrationStats>> e : local.entrySet()) { final HashMap<String, CalibrationStats> map = e.getValue(); if (map.containsKey(DUMMY_SEQ)) { final double currentCov = (length == 0) ? 0 : (double) map.get(DUMMY_SEQ).getTotalLength() / length; final String sampleName = e.getKey(); Diagnostic.userLog( "Average coverage for sample " + sampleName + " is " + Utils.realFormat(currentCov, 2)); for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { final String seqName = entry.getKey(); Map<String, Double> samples = mSequenceSampleCoverages.get(seqName); if (samples == null) { samples = new HashMap<>(); mSequenceSampleCoverages.put(seqName, samples); } samples.put(sampleName, currentCov); } currentSum += currentCov; if (currentCov > currentMax) { currentMax = currentCov; } } } Diagnostic.userLog("Average combined coverage is " + Utils.realFormat(currentSum, 2)); for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { final String seqName = entry.getKey(); mSumCoverages.put(seqName, currentSum); } } else { // Per-sequence separation is in calibration data, calculate per-sequence coverage // level final Map<String, HashMap<String, CalibrationStats>> local = new HashMap<>(); findIndividualPerSequenceCoverage( calibrator, sequenceLengths, readGroupToSampleId, local, restriction); for (final Map.Entry<String, Integer> entry : sequenceLengths.entrySet()) { final String seqName = entry.getKey(); if (restriction != null && !seqName.equals(restriction.getSequenceName())) { continue; } final int seqLength = entry.getValue(); double currentMax = 0; double currentSum = 0; for (Map.Entry<String, HashMap<String, CalibrationStats>> e : local.entrySet()) { final HashMap<String, CalibrationStats> map = e.getValue(); if (map.containsKey(seqName)) { final double currentCov = (seqLength == 0) ? 0 : (double) map.get(seqName).getTotalLength() / seqLength; final String sampleName = e.getKey(); Diagnostic.userLog( "Average coverage across sequence " + seqName + " for sample " + sampleName + " is " + Utils.realFormat(currentCov, 2)); Map<String, Double> samples = mSequenceSampleCoverages.get(seqName); if (samples == null) { samples = new HashMap<>(); mSequenceSampleCoverages.put(seqName, samples); } samples.put(sampleName, currentCov); currentSum += currentCov; if (currentCov > currentMax) { currentMax = currentCov; } } } Diagnostic.userLog( "Average combined coverage for sequence " + seqName + " is " + Utils.realFormat(currentSum, 2)); mSumCoverages.put(seqName, currentSum); } } }
public void testValidator() throws IOException { Diagnostic.setLogStream(); final MemoryPrintStream err = new MemoryPrintStream(); final CliDiagnosticListener listener = new CliDiagnosticListener(err.printStream()); Diagnostic.addListener(listener); final File tempDir = FileHelper.createTempDirectory(); try { final CFlags flags = new CFlags("PhyloTest", TestUtils.getNullPrintStream(), err.printStream()); SimilarityCli.initFlags(flags); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", "ba", "-i", "humbug"}, err, "Only set one of --input or --input-list-file"); checkErrorMessage( flags, new String[] {"-o", "blah", "-i", "humbug"}, err, "The specified SDF, \"humbug\", does not exist."); final File fakePaired = new File(tempDir, "fakePaired"); assertTrue(fakePaired.mkdir()); final File left = new File(fakePaired, "left"); assertTrue(left.mkdir()); assertTrue(new File(fakePaired, "right").mkdir()); checkErrorMessage( flags, new String[] {"-o", "blah", "-i", fakePaired.getPath()}, err, "The specified SDF, \"" + fakePaired.getPath() + "\", is a paired end SDF."); checkErrorMessage( flags, new String[] {"-o", "blah"}, err, "Must set one of --input or --input-list-file"); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", "ba"}, err, "The specified list file, \"ba\", does not exist."); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", fakePaired.getPath()}, err, "The specified list file,", "\"" + fakePaired.getPath() + "\",", "directory."); final File fakeList = new File(tempDir, "fakeList.txt"); assertTrue(fakeList.createNewFile()); checkErrorMessage( flags, new String[] {"-o", fakePaired.getPath(), "-I", fakeList.getPath()}, err, "The directory", "\"" + fakePaired.getPath() + "\"", "already exists."); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", fakeList.getPath(), "-w", "-1"}, err, "The specified flag \"--word\" has invalid value \"-1\". It should be greater than or equal to \"1\"."); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", fakeList.getPath(), "-w", "0"}, err, "The specified flag \"--word\" has invalid value \"0\". It should be greater than or equal to \"1\"."); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", fakeList.getPath(), "-w", "33"}, err, "The specified flag \"--word\" has invalid value \"33\". It should be less than or equal to \"32\"."); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", fakeList.getPath(), "--max-reads", "0"}, err, "The --max-reads must be greater than 0"); checkErrorMessage( flags, new String[] {"-o", "blah", "-I", fakeList.getPath(), "-w", "20", "-s", "20"}, err); checkErrorMessage( flags, new String[] {"-o", "blah", "-i", left.getPath(), "--max-reads", "1"}, err, "Only set --max-reads when using --input-list-file"); checkErrorMessage(flags, new String[] {"-o", "blah", "-i", left.getPath()}, err); } finally { Diagnostic.removeListener(listener); err.close(); assertTrue(FileHelper.deleteAll(tempDir)); } }