public PositionWeightMatrix permuteColumns(boolean preserveGCDinucleotides) { PositionWeightMatrix original = copy(); List<Integer> dinucleotidesToPreserveFirstIdx = preserveGCDinucleotides ? original.gcDinucleotidesFistIdxs() : new ArrayList<Integer>(); PositionWeightMatrix permutted = new PositionWeightMatrix(getName() + "_perm"); Random r = new Random(); List<Integer> idxList = new ArrayList<Integer>(original.size()); for (int i = 0; i < original.size(); i++) { idxList.add(i); } while (idxList.size() > 0) { int idxOfIdx = r.nextInt(idxList.size()); int idx = idxList.remove(idxOfIdx); if (dinucleotidesToPreserveFirstIdx.contains(idx - 1)) { PositionWeightColumn col = original.get(idx - 1); permutted.add(col); PositionWeightColumn nextCol = original.get(idx); idxList.remove(idxOfIdx - 1); permutted.add(nextCol); } else { PositionWeightColumn col = original.get(idx); permutted.add(col); if (dinucleotidesToPreserveFirstIdx.contains(idx)) { PositionWeightColumn nextCol = original.get(idx + 1); idxList.remove(idxOfIdx); permutted.add(nextCol); } } } return permutted; }
public double kullbackLeiber(PositionWeightMatrix other) { double kl = 0; for (int i = 0; i < other.getNumCol(); i++) { kl += get(i).kullbackLeiber(other.get(i)); } return kl; }
/** * Computes the euclidean centroid, it may not be the formal centroid for different metrics but * intuitively the average counts should provide a good cluster representative which is what this * method intends to return * * @param pwmSet - Collection of pwms from which to compute the centroid0 * @return The euclidean centroid * @throws IllegalArgumentException - When not all PWMs have the same dimension. */ public PositionWeightMatrix centroidOf(Collection<PositionWeightMatrix> pwmSet) throws IllegalArgumentException { Matrix centroidMatrix = null; Iterator<PositionWeightMatrix> pwmIt = pwmSet.iterator(); if (pwmIt.hasNext()) { PositionWeightMatrix first = pwmIt.next(); PositionWeightColumn firstCol = first.get(0); centroidMatrix = new Matrix(firstCol.getAlphabetSize(), first.getNumCol()); addToCentroid(centroidMatrix, first); } while (pwmIt.hasNext()) { PositionWeightMatrix pwm = pwmIt.next(); if (pwm.getNumCol() != centroidMatrix.getColumnDimension()) { throw new IllegalArgumentException( "Error computing centroid. All PWMs in set should have the same dimension"); } addToCentroid(centroidMatrix, pwm); } centroidMatrix.times(1 / (double) pwmSet.size()); PositionWeightMatrix centroid = new PositionWeightMatrix("centroid"); for (int j = 0; j < centroidMatrix.getColumnDimension(); j++) { centroid.addColumn(centroidMatrix.getColumn(j)); } return centroid; }
protected void addToCentroid(Matrix m, PositionWeightMatrix pwm) { for (int i = 0; i < m.getColumnDimension(); i++) { PositionWeightColumn c = pwm.get(i); for (int j = 0; j < m.getRowDimension(); j++) { m.set(j, i, m.get(j, i) + c.getWeight(j)); } } }