/** * Computes the euclidean centroid, it may not be the formal centroid for different metrics but * intuitively the average counts should provide a good cluster representative which is what this * method intends to return * * @param pwmSet - Collection of pwms from which to compute the centroid0 * @return The euclidean centroid * @throws IllegalArgumentException - When not all PWMs have the same dimension. */ public PositionWeightMatrix centroidOf(Collection<PositionWeightMatrix> pwmSet) throws IllegalArgumentException { Matrix centroidMatrix = null; Iterator<PositionWeightMatrix> pwmIt = pwmSet.iterator(); if (pwmIt.hasNext()) { PositionWeightMatrix first = pwmIt.next(); PositionWeightColumn firstCol = first.get(0); centroidMatrix = new Matrix(firstCol.getAlphabetSize(), first.getNumCol()); addToCentroid(centroidMatrix, first); } while (pwmIt.hasNext()) { PositionWeightMatrix pwm = pwmIt.next(); if (pwm.getNumCol() != centroidMatrix.getColumnDimension()) { throw new IllegalArgumentException( "Error computing centroid. All PWMs in set should have the same dimension"); } addToCentroid(centroidMatrix, pwm); } centroidMatrix.times(1 / (double) pwmSet.size()); PositionWeightMatrix centroid = new PositionWeightMatrix("centroid"); for (int j = 0; j < centroidMatrix.getColumnDimension(); j++) { centroid.addColumn(centroidMatrix.getColumn(j)); } return centroid; }
protected void addToCentroid(Matrix m, PositionWeightMatrix pwm) { for (int i = 0; i < m.getColumnDimension(); i++) { PositionWeightColumn c = pwm.get(i); for (int j = 0; j < m.getRowDimension(); j++) { m.set(j, i, m.get(j, i) + c.getWeight(j)); } } }
/** * Computes the information content of this position weith matris * * @return */ public double ic() { double ic = 0; for (PositionWeightColumn col : this) { ic += col.getInformationContent(); } return ic; }
private List<Integer> gcDinucleotidesFistIdxs() { List<Integer> dinucleotidesToPreserveFirstIdx = new ArrayList<Integer>(); for (int i = 0; i < size() - 1; i++) { PositionWeightColumn thisCol = get(i); PositionWeightColumn nextCol = get(i + 1); if (thisCol.getLogRatio('G') > 0 && nextCol.getLogRatio('C') > 0) { dinucleotidesToPreserveFirstIdx.add(i); } } return dinucleotidesToPreserveFirstIdx; }
public void write(BufferedWriter bw, NumberFormat formatter) throws IOException { bw.write(">" + name); bw.newLine(); if (size() > 0) { int rows = get(0).getAlphabetSize(); for (int i = 0; i < rows; i++) { Iterator<PositionWeightColumn> colIt = iterator(); while (colIt.hasNext()) { PositionWeightColumn col = colIt.next(); bw.write(formatter.format(col.getWeight(i))); if (colIt.hasNext()) { bw.write("\t"); } } bw.newLine(); } } }
public void addPseudoCounts() { for (PositionWeightColumn pwc : this) { pwc.addPseudoCounts(); } }