/** * Checks whether all targets in the input target collection have a designated name. * * @param result the query target collection. * @throws UserException if there are some targets with no name in {@code result}. */ private void checkAllTargetsHaveName(TargetCollection<Target> result) { if (result.targets().stream().anyMatch(t -> t.getName() == null || t.getName().equals(""))) { throw new UserException( String.format( "Target output info requested '%s' requires that each target has a designated unique name/id but there are some with no names: %s", targetOutInfo.name(), result .targets() .stream() .filter(t -> t.getName() == null || t.getName().equals("")) .limit(10) .map(e -> result.location(e).toString()) .collect(Collectors.joining(", ")))); } }
@Override public void onTraversalStart() { sampleCollection = new SampleCollection(getHeaderForReads()); logger.log(Level.INFO, "Reading targets locations from intervals..."); targetCollection = resolveTargetCollection(); // Initializing count and count column management member fields: countColumns = groupBy.countColumns(this); final int columnCount = countColumns.columnCount(); counts = new int[columnCount][targetCollection.targetCount()]; // Open output files and write headers: outputWriter = openOutputWriter( output, composeMatrixOutputHeader( getCommandLine(), targetOutInfo, groupBy, countColumns.columnNames())); if (columnSummaryOutput != null) { columnSummaryOutputWriter = openOutputWriter( columnSummaryOutput, composeColumnSummaryHeader( getCommandLine(), groupBy, targetCollection.targetCount(), targetCollection.totalSize())); } if (rowSummaryOutput != null) { rowSummaryOutputWriter = openOutputWriter( rowSummaryOutput, composeRowOutputHeader( getCommandLine(), targetOutInfo, groupBy, countColumns.columnCount())); } // Next we start the traversal: logger.log(Level.INFO, "Collecting read counts ..."); }
@Override public void apply( final GATKRead read, final ReferenceContext referenceContext, final FeatureContext featureContext) { final SimpleInterval readLocation = referenceContext.getInterval(); final int columnIndex = countColumns.columnIndex(read); if (columnIndex >= 0) { // < 0 would means that the read is to be ignored. targetCollection.indexRange(readLocation).forEach(i -> counts[columnIndex][i]++); } }
/** * Target information string composer for the genomic coordinate part of the target. * * @param index the index of a target within the collection. * @param collection the containing target collection. * @return never {@code null}. */ private static String coordinateComposer( final int index, final TargetCollection<Target> collection) { final SimpleInterval location = collection.location(index); if (location == null) { return String.join(COLUMN_SEPARATOR, NO_VALUE_STRING, NO_VALUE_STRING, NO_VALUE_STRING); } else { return String.format( String.join(COLUMN_SEPARATOR, "%s", "%d", "%d"), location.getContig(), location.getStart(), location.getEnd()); } }
/** Writes the column summary output table. */ private void writeColumnSummaryOutput() { if (columnSummaryOutputWriter == null) { return; } final long totalSize = targetCollection.totalSize(); final List<String> columnNames = countColumns.columnNames(); for (int i = 0; i < columnNames.size(); i++) { final long sum = IntStream.of(counts[i]).sum(); columnSummaryOutputWriter.println( String.join( COLUMN_SEPARATOR, columnNames.get(i), String.valueOf(sum), String.format(AVERAGE_DOUBLE_FORMAT, sum / (double) totalSize))); } }
@Override public Object onTraversalSuccess() { logger.log(Level.INFO, "Collecting read counts done."); logger.log(Level.INFO, "Writing counts ..."); final long[] columnTotals = calculateColumnTotals(); IntStream.range(0, targetCollection.targetCount()) .forEach( target -> { final int[] countBuffer = IntStream.range(0, counts.length).map(column -> counts[column][target]).toArray(); writeOutputRows(countBuffer, columnTotals, target); }); logger.log(Level.INFO, "Writing counts done."); writeColumnSummaryOutput(); return "SUCCESS"; }
/** * Writes the row in the main matrix output file for a target and, if requested, the corresponding * row in the row summary output file. * * @param countBuffer the counts for the target. * @param index the index of target within the target collection. */ private void writeOutputRows( final int[] countBuffer, final long[] columnTotals, final int index) { final String countString = IntStream.range(0, countBuffer.length) .mapToObj(i -> transform.apply(countBuffer[i], columnTotals[i])) .collect(Collectors.joining(COLUMN_SEPARATOR)); final String targetInfoString = targetOutInfo.composeTargetOutInfoString(index, targetCollection); outputWriter.println(String.join(COLUMN_SEPARATOR, targetInfoString, countString)); if (rowSummaryOutputWriter != null) { final long sum = MathUtils.sum(countBuffer); final SimpleInterval location = targetCollection.location(index); final int targetSize = location.size(); rowSummaryOutputWriter.println( String.join( COLUMN_SEPARATOR, targetInfoString, Long.toString(sum), String.format( AVERAGE_DOUBLE_FORMAT, sum / ((float) countColumns.columnCount() * targetSize)))); } }
/** * Composes the target information output string. * * @param index of the target in the collection. * @param collection the target containing collection. * @throws IllegalArgumentException if either {@code target} or {@code collection} is {@code * null}. */ protected String composeTargetOutInfoString( final int index, final TargetCollection<Target> collection) { Utils.nonNull(collection, "the collection cannot be null"); Utils.validIndex(index, collection.targetCount()); return composer.apply(index, collection); }
/** * Target information string composer for the name part of the target information. * * @param index the target index. * @param collection the containing target collection. * @return never {@code null}. */ private static String nameComposer(final int index, final TargetCollection<Target> collection) { final String name = collection.target(index).getName(); return name == null ? NO_VALUE_STRING : name; }