Beispiel #1
0
  @Test
  public void testInverse2x2() {
    double tol = 0.001;
    Map<Key, Value> input = new TreeMap<>(TestUtil.COMPARE_KEY_TO_COLQ);
    input.put(new Key("1", "", "1"), new Value("4".getBytes()));
    input.put(new Key("1", "", "2"), new Value("3".getBytes()));
    input.put(new Key("2", "", "1"), new Value("1".getBytes()));
    input.put(new Key("2", "", "2"), new Value("1".getBytes()));
    Map<Key, Value> expect = new TreeMap<>(TestUtil.COMPARE_KEY_TO_COLQ);
    expect.put(new Key("1", "", "1"), new Value("1 ".getBytes()));
    expect.put(new Key("1", "", "2"), new Value("-3".getBytes()));
    expect.put(new Key("2", "", "1"), new Value("-1".getBytes()));
    expect.put(new Key("2", "", "2"), new Value("4 ".getBytes()));

    RealMatrix matrix = MemMatrixUtil.buildMatrix(input.entrySet().iterator(), 2);
    Assert.assertEquals(2, matrix.getRowDimension());
    Assert.assertEquals(2, matrix.getColumnDimension());
    Assert.assertEquals(4, matrix.getEntry(0, 0), tol);
    Assert.assertEquals(3, matrix.getEntry(0, 1), tol);
    Assert.assertEquals(1, matrix.getEntry(1, 0), tol);
    Assert.assertEquals(1, matrix.getEntry(1, 1), tol);

    matrix = MemMatrixUtil.doInverse(matrix, -1);
    Assert.assertEquals(2, matrix.getRowDimension());
    Assert.assertEquals(2, matrix.getColumnDimension());
    Assert.assertEquals(1, matrix.getEntry(0, 0), tol);
    Assert.assertEquals(-3, matrix.getEntry(0, 1), tol);
    Assert.assertEquals(-1, matrix.getEntry(1, 0), tol);
    Assert.assertEquals(4, matrix.getEntry(1, 1), tol);

    SortedMap<Key, Value> back =
        MemMatrixUtil.matrixToMap(new TreeMap<Key, Value>(TestUtil.COMPARE_KEY_TO_COLQ), matrix);
    TestUtil.assertEqualDoubleMap(expect, back);
  }
  /**
   * Applies rank transform to each of the columns of <code>matrix</code> using the current <code>
   * rankingAlgorithm</code>
   *
   * @param matrix matrix to transform
   * @return a rank-transformed matrix
   */
  private RealMatrix rankTransform(final RealMatrix matrix) {
    RealMatrix transformed = null;

    if (rankingAlgorithm instanceof NaturalRanking
        && ((NaturalRanking) rankingAlgorithm).getNanStrategy() == NaNStrategy.REMOVED) {
      final Set<Integer> nanPositions = new HashSet<Integer>();
      for (int i = 0; i < matrix.getColumnDimension(); i++) {
        nanPositions.addAll(getNaNPositions(matrix.getColumn(i)));
      }

      // if we have found NaN values, we have to update the matrix size
      if (!nanPositions.isEmpty()) {
        transformed =
            new BlockRealMatrix(
                matrix.getRowDimension() - nanPositions.size(), matrix.getColumnDimension());
        for (int i = 0; i < transformed.getColumnDimension(); i++) {
          transformed.setColumn(i, removeValues(matrix.getColumn(i), nanPositions));
        }
      }
    }

    if (transformed == null) {
      transformed = matrix.copy();
    }

    for (int i = 0; i < transformed.getColumnDimension(); i++) {
      transformed.setColumn(i, rankingAlgorithm.rank(transformed.getColumn(i)));
    }

    return transformed;
  }
 @Override
 public Label predict(Instance instance) {
   Label l = null;
   if (instance.getLabel() instanceof ClassificationLabel || instance.getLabel() == null) {
     // ----------------- declare variables ------------------
     double lambda = 0.0;
     RealVector x_instance = new ArrayRealVector(matrixX.getColumnDimension(), 0);
     double result = 0.0;
     // -------------------------- initialize xi -------------------------
     for (int idx = 0; idx < matrixX.getColumnDimension(); idx++) {
       x_instance.setEntry(idx, instance.getFeatureVector().get(idx + 1));
     }
     // ------------------ get lambda -----------------------
     for (int j = 0; j < alpha.getDimension(); j++) {
       lambda += alpha.getEntry(j) * kernelFunction(matrixX.getRowVector(j), x_instance);
     }
     // ----------------- make prediction -----------------
     Sigmoid g = new Sigmoid(); // helper function
     result = g.value(lambda);
     l = new ClassificationLabel(result < 0.5 ? 0 : 1);
   } else {
     System.out.println("label type error!");
   }
   return l;
 }
  // Makes and scales the matrices V, D, and VT (to avoid ugly decimals)
  private void makeVDVT(EigenDecomp ed) {
    V = ed.getV();
    D = ed.getD();
    VT = ed.getVT();
    double ref = 0;

    for (int i = 0; i < V.getRowDimension(); i++) {
      ref = 0;
      for (int j = 0; j < V.getColumnDimension(); j++) {
        if (V.getEntry(j, i) != 0 && ref == 0) {
          ref = V.getEntry(j, i);
        }
        if (ref != 0) {
          V.setEntry(j, i, V.getEntry(j, i) / Math.abs(ref));
        }
      }
    }

    for (int i = 0; i < VT.getRowDimension(); i++) {
      ref = 0;
      for (int j = 0; j < VT.getColumnDimension(); j++) {
        if (VT.getEntry(j, i) != 0 && ref == 0) {
          ref = VT.getEntry(j, i);
        }
        if (ref != 0) {
          VT.setEntry(j, i, VT.getEntry(j, i) / Math.abs(ref));
        }
      }
    }
  }
Beispiel #5
0
  @Test
  public void testInverseIdentity() {
    double tol = 0.00001;
    Map<Key, Value> input = new TreeMap<>(TestUtil.COMPARE_KEY_TO_COLQ);
    input.put(new Key("1", "", "1"), new Value("1".getBytes()));
    //    input.put(new Key("1", "", "2"), new Value("1".getBytes()));
    //    input.put(new Key("2", "", "1"), new Value("1".getBytes()));
    input.put(new Key("2", "", "2"), new Value("1".getBytes()));

    RealMatrix matrix = MemMatrixUtil.buildMatrix(input.entrySet().iterator(), 2);
    Assert.assertEquals(2, matrix.getRowDimension());
    Assert.assertEquals(2, matrix.getColumnDimension());
    Assert.assertEquals(1, matrix.getEntry(0, 0), tol);
    Assert.assertEquals(0, matrix.getEntry(0, 1), tol);
    Assert.assertEquals(0, matrix.getEntry(1, 0), tol);
    Assert.assertEquals(1, matrix.getEntry(1, 1), tol);

    matrix = MemMatrixUtil.doInverse(matrix, -1);
    Assert.assertEquals(2, matrix.getRowDimension());
    Assert.assertEquals(2, matrix.getColumnDimension());
    Assert.assertEquals(1, matrix.getEntry(0, 0), tol);
    Assert.assertEquals(0, matrix.getEntry(0, 1), tol);
    Assert.assertEquals(0, matrix.getEntry(1, 0), tol);
    Assert.assertEquals(1, matrix.getEntry(1, 1), tol);

    SortedMap<Key, Value> back =
        MemMatrixUtil.matrixToMap(new TreeMap<Key, Value>(TestUtil.COMPARE_KEY_TO_COLQ), matrix);
    TestUtil.assertEqualDoubleMap(input, back);
    //    Assert.assertEquals(1, Double.parseDouble(new String(back.get(new Key("1", "",
    // "1")).get())), tol);
    //    Assert.assertEquals(1, Double.parseDouble(new String(back.get(new Key("2", "",
    // "2")).get())), tol);
  }
Beispiel #6
0
  /**
   * @param weight Weight matrix.
   * @throws NonSquareMatrixException if the argument is not a square matrix.
   */
  public Weight(RealMatrix weight) {
    if (weight.getColumnDimension() != weight.getRowDimension()) {
      throw new NonSquareMatrixException(weight.getColumnDimension(), weight.getRowDimension());
    }

    weightMatrix = weight.copy();
  }
 private RealMatrix normalizeData(RealMatrix matrix, UserProfileEigenModel model) {
   RealMatrix normalizedData =
       new Array2DRowRealMatrix(matrix.getRowDimension(), matrix.getColumnDimension());
   if (LOG.isDebugEnabled()) LOG.debug("model statistics size: " + model.statistics().length);
   for (int i = 0; i < matrix.getRowDimension(); i++) {
     for (int j = 0; j < matrix.getColumnDimension(); j++) {
       double value =
           (matrix.getEntry(i, j) - model.statistics()[j].getMean())
               / model.statistics()[j].getStddev();
       normalizedData.setEntry(i, j, value);
     }
   }
   return normalizedData;
 }
  public double computeSimilarity(RealMatrix sourceDoc, RealMatrix targetDoc) {
    if (sourceDoc.getRowDimension() != targetDoc.getRowDimension()
        || sourceDoc.getColumnDimension() != targetDoc.getColumnDimension()
        || sourceDoc.getColumnDimension() != 1) {
      throw new IllegalArgumentException(
          "Matrices are not column matrices or not of the same size");
    }
    double[] source = sourceDoc.getColumn(0);
    double[] target = targetDoc.getColumn(0);

    double dotProduct = dot(source, target);
    double distance = norm(source) * norm(target);
    return dotProduct / distance;
  }
Beispiel #9
0
 private List<Integer> findZeroColumns(RealMatrix base) {
   List<Integer> indices = new ArrayList<>();
   for (int i = 0; i < base.getColumnDimension(); i++) {
     if (base.getColumnVector(i).getNorm() == 0) indices.add(i);
   }
   return indices;
 }
Beispiel #10
0
  public static RealMatrix stochasticSubmatrix(RealMatrix data, int batch_size, Random rng) {
    // assume all data has the size number_samples by number_features
    int num_samples = data.getRowDimension();
    int num_features = data.getColumnDimension();
    int batch_num = num_samples / batch_size + 1;

    // randomly generate a batch index
    int batch_index = rng.nextInt(batch_num);
    List<Integer> rowIndex_tmp = new ArrayList<Integer>();

    for (int i = 0; i < batch_size; i++) {
      if (batch_size * batch_index + i >= num_samples) {
        break;
      } else {
        rowIndex_tmp.add(batch_size * batch_index + i);
      }
    }
    int[] rowIndex = TypeConvert.ArrayTointv(rowIndex_tmp);

    // System.out.println(rowIndex_tmp);
    int[] columnIndex = new int[num_features];
    for (int j = 0; j < num_features; j++) {
      columnIndex[j] = j;
    }

    // System.out.println(batch_index);

    // return null;
    return data.getSubMatrix(rowIndex, columnIndex);
  }
 public WeightedLeastSquaresMethod(RealMatrix R, int nFactors, RotationMethod rotationMethod) {
   this.nVariables = R.getColumnDimension();
   this.nParam = nVariables;
   this.nFactors = nFactors;
   this.rotationMethod = rotationMethod;
   this.R = R;
   this.R2 = R.copy();
 }
Beispiel #12
0
 /**
  * Throws MathIllegalArgumentException if the matrix does not have at least two columns and two
  * rows.
  *
  * @param matrix matrix to check for sufficiency
  * @throws MathIllegalArgumentException if there is insufficient data
  */
 private void checkSufficientData(final RealMatrix matrix) {
   int nRows = matrix.getRowDimension();
   int nCols = matrix.getColumnDimension();
   if (nRows < 2 || nCols < 2) {
     throw new MathIllegalArgumentException(
         LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS, nRows, nCols);
   }
 }
 public double sumMatrix(RealMatrix matrix) {
   double sum = 0.0;
   for (int i = 0; i < matrix.getRowDimension(); i++) {
     for (int j = 0; j < matrix.getColumnDimension(); j++) {
       sum += matrix.getEntry(i, j);
     }
   }
   return sum;
 }
 public double[][] residuals() {
   double[][] resid = new double[nItems][nItems];
   for (int i = 0; i < SIGMA.getRowDimension(); i++) {
     for (int j = 0; j < SIGMA.getColumnDimension(); j++) {
       resid[i][j] = varcov.getEntry(i, j) - SIGMA.getEntry(i, j);
     }
   }
   return resid;
 }
    public double valueAt(double[] param) {
      double[] sdInv = new double[nVariables];

      for (int i = 0; i < nVariables; i++) {
        R.setEntry(i, i, 1.0 - param[i]);
        sdInv[i] = 1.0 / Sinv.getEntry(i, i);
      }

      DiagonalMatrix diagSdInv = new DiagonalMatrix(sdInv);

      EigenDecomposition eigen = new EigenDecomposition(R);
      RealMatrix eigenVectors = eigen.getV().getSubMatrix(0, nVariables - 1, 0, nFactors - 1);

      double[] ev = new double[nFactors];
      for (int i = 0; i < nFactors; i++) {
        ev[i] = Math.sqrt(eigen.getRealEigenvalue(i));
      }
      DiagonalMatrix evMatrix =
          new DiagonalMatrix(
              ev); // USE Apache version of Diagonal matrix when upgrade to version 3.2
      RealMatrix LAMBDA = eigenVectors.multiply(evMatrix);
      RealMatrix SIGMA = (LAMBDA.multiply(LAMBDA.transpose()));

      double value = 0.0;
      RealMatrix DIF = R.subtract(SIGMA);
      for (int i = 0; i < DIF.getRowDimension(); i++) {
        for (int j = 0; j < DIF.getColumnDimension(); j++) {
          value = DIF.getEntry(i, j);
          DIF.setEntry(i, j, Math.pow(value, 2));
        }
      }

      RealMatrix RESID = diagSdInv.multiply(DIF).multiply(diagSdInv);

      double sum = 0.0;
      for (int i = 0; i < RESID.getRowDimension(); i++) {
        for (int j = 0; j < RESID.getColumnDimension(); j++) {
          sum += RESID.getEntry(i, j);
        }
      }
      return sum;
    }
 public double[][] squaredResiduals() {
   double[][] resid = new double[nItems][nItems];
   double temp = 0.0;
   for (int i = 0; i < SIGMA.getRowDimension(); i++) {
     for (int j = 0; j < SIGMA.getColumnDimension(); j++) {
       temp = varcov.getEntry(i, j) - SIGMA.getEntry(i, j);
       resid[i][j] = temp * temp;
     }
   }
   return resid;
 }
 public double meanSquaredResidual() {
   double ni = Double.valueOf(nItems).doubleValue();
   double temp = 0.0, sum = 0.0;
   for (int i = 0; i < SIGMA.getRowDimension(); i++) {
     for (int j = 0; j < SIGMA.getColumnDimension(); j++) {
       temp = varcov.getEntry(i, j) - SIGMA.getEntry(i, j);
       sum += temp * temp;
     }
   }
   return sum / (ni * ni);
 }
 public double sumSquaredElements(RealMatrix matrix) {
   double sum = 0.0;
   double v = 0.0;
   for (int i = 0; i < matrix.getRowDimension(); i++) {
     for (int j = 0; j < matrix.getColumnDimension(); j++) {
       v = matrix.getEntry(i, j);
       sum += (v * v);
     }
   }
   return sum;
 }
Beispiel #19
0
 /**
  * Returns a matrix of standard errors associated with the estimates in the correlation matrix.
  * <br>
  * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard error associated with
  * <code>getCorrelationMatrix.getEntry(i,j)</code>
  *
  * <p>The formula used to compute the standard error is <br>
  * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code> where <code>r
  * </code> is the estimated correlation coefficient and <code>n</code> is the number of
  * observations in the source dataset.
  *
  * <p>To use this method, one of the constructors that supply an input matrix must have been used
  * to create this instance.
  *
  * @return matrix of correlation standard errors
  * @throws NullPointerException if this instance was created with no data
  */
 public RealMatrix getCorrelationStandardErrors() {
   int nVars = correlationMatrix.getColumnDimension();
   double[][] out = new double[nVars][nVars];
   for (int i = 0; i < nVars; i++) {
     for (int j = 0; j < nVars; j++) {
       double r = correlationMatrix.getEntry(i, j);
       out[i][j] = FastMath.sqrt((1 - r * r) / (nObs - 2));
     }
   }
   return new BlockRealMatrix(out);
 }
Beispiel #20
0
 /**
  * Computes the Kendall's Tau rank correlation matrix for the columns of the input matrix.
  *
  * @param matrix matrix with columns representing variables to correlate
  * @return correlation matrix
  */
 public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
   int nVars = matrix.getColumnDimension();
   RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
   for (int i = 0; i < nVars; i++) {
     for (int j = 0; j < i; j++) {
       double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
       outMatrix.setEntry(i, j, corr);
       outMatrix.setEntry(j, i, corr);
     }
     outMatrix.setEntry(i, i, 1d);
   }
   return outMatrix;
 }
 private static double[] calculateColumnInverseMeans(RealMatrix matrix) {
   return IntStream.range(0, matrix.getColumnDimension())
       .mapToDouble(
           i ->
               1.0
                   / IntStream.range(0, matrix.getRowDimension())
                       .mapToDouble(j -> matrix.getEntry(j, i))
                       .average()
                       .orElseThrow(
                           () ->
                               new IllegalArgumentException(
                                   "cannot calculate a average for column " + i)))
       .toArray();
 }
Beispiel #22
0
 /**
  * Compute a covariance matrix from a matrix whose columns represent covariates.
  *
  * @param matrix input matrix (must have at least one column and two rows)
  * @param biasCorrected determines whether or not covariance estimates are bias-corrected
  * @return covariance matrix
  * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
  */
 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
     throws MathIllegalArgumentException {
   int dimension = matrix.getColumnDimension();
   Variance variance = new Variance(biasCorrected);
   RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
   for (int i = 0; i < dimension; i++) {
     for (int j = 0; j < i; j++) {
       double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
       outMatrix.setEntry(i, j, cov);
       outMatrix.setEntry(j, i, cov);
     }
     outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
   }
   return outMatrix;
 }
Beispiel #23
0
 private RealMatrix removeZeroColumns(RealMatrix base, List<Integer> zeroColumns) {
   int adjustedDim = base.getRowDimension() - zeroColumns.size();
   if (adjustedDim == 0) return base;
   RealMatrix adjusted = new Array2DRowRealMatrix(adjustedDim, adjustedDim);
   int i = 0, j = 0;
   for (int basei = 0; basei < base.getRowDimension(); basei++) {
     if (zeroColumns.contains(basei)) continue;
     for (int basej = 0; basej < base.getColumnDimension(); basej++) {
       if (zeroColumns.contains(basej)) continue;
       adjusted.setEntry(i, j++, base.getEntry(basei, basej));
     }
     i++;
     j = 0;
   }
   return adjusted;
 }
Beispiel #24
0
 /**
  * Derives a correlation matrix from a covariance matrix.
  *
  * <p>Uses the formula <br>
  * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where <code>r(&middot,&middot;)</code> is the
  * correlation coefficient and <code>s(&middot;)</code> means standard deviation.
  *
  * @param covarianceMatrix the covariance matrix
  * @return correlation matrix
  */
 public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
   int nVars = covarianceMatrix.getColumnDimension();
   RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
   for (int i = 0; i < nVars; i++) {
     double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i));
     outMatrix.setEntry(i, i, 1d);
     for (int j = 0; j < i; j++) {
       double entry =
           covarianceMatrix.getEntry(i, j)
               / (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j)));
       outMatrix.setEntry(i, j, entry);
       outMatrix.setEntry(j, i, entry);
     }
   }
   return outMatrix;
 }
Beispiel #25
0
 /**
  * Returns a matrix of p-values associated with the (two-sided) null hypothesis that the
  * corresponding correlation coefficient is zero.
  *
  * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability that a random variable
  * distributed as <code>t<sub>n-2</sub></code> takes a value with absolute value greater than or
  * equal to <br>
  * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code>
  *
  * <p>The values in the matrix are sometimes referred to as the <i>significance</i> of the
  * corresponding correlation coefficients.
  *
  * <p>To use this method, one of the constructors that supply an input matrix must have been used
  * to create this instance.
  *
  * @return matrix of p-values
  * @throws org.apache.commons.math3.exception.MaxCountExceededException if an error occurs
  *     estimating probabilities
  * @throws NullPointerException if this instance was created with no data
  */
 public RealMatrix getCorrelationPValues() {
   TDistribution tDistribution = new TDistribution(nObs - 2);
   int nVars = correlationMatrix.getColumnDimension();
   double[][] out = new double[nVars][nVars];
   for (int i = 0; i < nVars; i++) {
     for (int j = 0; j < nVars; j++) {
       if (i == j) {
         out[i][j] = 0d;
       } else {
         double r = correlationMatrix.getEntry(i, j);
         double t = FastMath.abs(r * FastMath.sqrt((nObs - 2) / (1 - r * r)));
         out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
       }
     }
   }
   return new BlockRealMatrix(out);
 }
  /**
   * Tangent normalize a coverage profile.
   *
   * <p>Notes about the Spark tangent normalization can be found in docs/PoN/
   *
   * @param pon Not {@code null}
   * @param targetFactorNormalizedCounts ReadCountCollection of counts that have already been
   *     normalized fully (typically, including the target factor normalization). I.e. a coverage
   *     profile The column names should be intact. Not {@code null} See {@link
   *     TangentNormalizer::createCoverageProfile}
   * @return never {@code null}
   */
  private static TangentNormalizationResult tangentNormalize(
      final PoN pon, final ReadCountCollection targetFactorNormalizedCounts, JavaSparkContext ctx) {

    Utils.nonNull(pon, "PoN cannot be null.");
    Utils.nonNull(targetFactorNormalizedCounts, "targetFactorNormalizedCounts cannot be null.");
    Utils.nonNull(
        targetFactorNormalizedCounts.columnNames(),
        "targetFactorNormalizedCounts column names cannot be null.");
    ParamUtils.isPositive(
        targetFactorNormalizedCounts.columnNames().size(),
        "targetFactorNormalizedCounts column names cannot be an empty list.");

    final Case2PoNTargetMapper targetMapper =
        new Case2PoNTargetMapper(targetFactorNormalizedCounts.targets(), pon.getPanelTargetNames());

    // The input counts with rows (targets) sorted so that they match the PoN's order.
    final RealMatrix tangentNormalizationRawInputCounts =
        targetMapper.fromCaseToPoNCounts(targetFactorNormalizedCounts.counts());

    // We prepare the counts for tangent normalization.
    final RealMatrix tangentNormalizationInputCounts =
        composeTangentNormalizationInputMatrix(tangentNormalizationRawInputCounts);

    if (ctx == null) {

      // Calculate the beta-hats for the input read count columns (samples).
      logger.info("Calculating beta hats...");
      final RealMatrix tangentBetaHats =
          pon.betaHats(tangentNormalizationInputCounts, true, EPSILON);

      // Actual tangent normalization step.
      logger.info(
          "Performing actual tangent normalization ("
              + tangentNormalizationInputCounts.getColumnDimension()
              + " columns)...");
      final RealMatrix tangentNormalizedCounts =
          pon.tangentNormalization(tangentNormalizationInputCounts, tangentBetaHats, true);

      // Output the tangent normalized counts.
      logger.info("Post-processing tangent normalization results...");
      final ReadCountCollection tangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
      final ReadCountCollection preTangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());

      return new TangentNormalizationResult(
          tangentNormalized, preTangentNormalized, tangentBetaHats, targetFactorNormalizedCounts);

    } else {

      /*
      Using Spark:  the code here is a little more complex for optimization purposes.

      Please see notes in docs/PoN ...

      Ahat^T = (C^T P^T) A^T
      Therefore, C^T is the RowMatrix

      pinv: P
      panel: A
      projection: Ahat
      cases: C
      betahat: C^T P^T
      tangentNormalizedCounts: C - Ahat
       */
      final RealMatrix pinv = pon.getReducedPanelPInverseCounts();
      final RealMatrix panel = pon.getReducedPanelCounts();

      // Make the C^T a distributed matrix (RowMatrix)
      final RowMatrix caseTDistMat =
          SparkConverter.convertRealMatrixToSparkRowMatrix(
              ctx, tangentNormalizationInputCounts.transpose(), TN_NUM_SLICES_SPARK);

      // Spark local matrices (transposed)
      final Matrix pinvTLocalMat =
          new DenseMatrix(
                  pinv.getRowDimension(),
                  pinv.getColumnDimension(),
                  Doubles.concat(pinv.getData()),
                  true)
              .transpose();
      final Matrix panelTLocalMat =
          new DenseMatrix(
                  panel.getRowDimension(),
                  panel.getColumnDimension(),
                  Doubles.concat(panel.getData()),
                  true)
              .transpose();

      // Calculate the projection transpose in a distributed matrix, then convert to Apache Commons
      // matrix (not transposed)
      final RowMatrix betahatDistMat = caseTDistMat.multiply(pinvTLocalMat);
      final RowMatrix projectionTDistMat = betahatDistMat.multiply(panelTLocalMat);
      final RealMatrix projection =
          SparkConverter.convertSparkRowMatrixToRealMatrix(
                  projectionTDistMat, tangentNormalizationInputCounts.transpose().getRowDimension())
              .transpose();

      // Subtract the cases from the projection
      final RealMatrix tangentNormalizedCounts =
          tangentNormalizationInputCounts.subtract(projection);

      // Construct the result object and return it with the correct targets.
      final ReadCountCollection tangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizedCounts, targetFactorNormalizedCounts.columnNames());
      final ReadCountCollection preTangentNormalized =
          targetMapper.fromPoNtoCaseCountCollection(
              tangentNormalizationInputCounts, targetFactorNormalizedCounts.columnNames());
      final RealMatrix tangentBetaHats =
          SparkConverter.convertSparkRowMatrixToRealMatrix(
              betahatDistMat, tangentNormalizedCounts.getColumnDimension());
      return new TangentNormalizationResult(
          tangentNormalized,
          preTangentNormalized,
          tangentBetaHats.transpose(),
          targetFactorNormalizedCounts);
    }
  }
Beispiel #27
0
  /////////////////////////////////// Constructor ////////////////////////////////////////////////
  public CostFunction(
      List<Pixel> input,
      List<Pixel> average,
      List<Pixel> model,
      RealMatrix inputFeatPts,
      RealMatrix averageFeatPts,
      RealMatrix modelFeatPts,
      float[][] eigenVectorsS,
      float[][] eigenVectorsT,
      Bitmap bmpModel,
      float[] inputAlpha,
      float[] inputBeta,
      float sigmaI,
      float sigmaF) {

    // this.featPtsIndex = readBin83PtIndex(CONFIG_DIRECTORY, INDEX83PT_FILE);
    this.input = input;
    this.average = average;
    this.model = model;
    this.inputFeatPts = inputFeatPts;
    this.averageFeatPts = averageFeatPts;
    this.modelFeatPts = modelFeatPts;
    this.k = inputFeatPts.getRowDimension(); // should be equal to 83
    this.num_points = input.size(); // should be equal to 8489

    this.eigValS = readBinFloat(SHAPE_DIRECTORY, EIG_SHAPE_FILE, 60);
    this.eigValT = readBinFloat(TEXTURE_DIRECTORY, EIG_TEXTURE_FILE, 100);
    /*this.subFSV = readBinSFSV(CONFIG_DIRECTORY, SFSV_FILE);*/
    this.landmarks83Index =
        readBin83PtIndex(
            CONFIG_DIRECTORY, INDEX83PT_FILE); // we access directly in featureShape file
    // instead of using subFSV file

    // Checking arguments
    if (num_points != 8489) {
      throw new IllegalArgumentException("num_points not equal 8489");
    }
    if (k != 83) {
      throw new IllegalArgumentException("k not equal 83");
    }
    if (input.isEmpty() || average.isEmpty() || model.isEmpty()) {
      throw new IllegalArgumentException("input or average or model list are empty");
    }
    if (input.size() != model.size()) {
      throw new IllegalArgumentException("input and model list do not have the same size");
    }
    if (input.size() != average.size()) {
      throw new IllegalArgumentException("input and average list do not have the same size");
    }
    if (average.size() != model.size()) {
      throw new IllegalArgumentException("average and model list do not have the same size");
    }
    if (averageFeatPts.getRowDimension() != k
        || averageFeatPts.getColumnDimension() != inputFeatPts.getColumnDimension()) {
      throw new IllegalArgumentException(
          "inputFeatPts and averageFeatPts do not have the same size");
    }
    if (modelFeatPts.getRowDimension() != k
        || modelFeatPts.getColumnDimension() != inputFeatPts.getColumnDimension()) {
      throw new IllegalArgumentException("inputFeatPts and modelFeatPts do not have the same size");
    }

    // Initialy populate list with 0, the value doesn't matter
    for (int h = 0; h < 500; h++) {
      this.randomList.add(0);
    }

    this.s = eigenVectorsS;
    this.t = eigenVectorsT;

    this.inputAlpha = inputAlpha;
    this.inputBeta = inputBeta;

    this.sigmaI = sigmaI;
    this.sigmaF = sigmaF;

    this.Iinput = computeIinput(); // is always the same
    this.Imodel = computeImodel();

    this.IinputBeta = computeIinputBeta(); // is always the same
    this.ImodelBeta = computeImodelBeta();

    this.dxModel = computeSobelGx(bmpModel);
    this.dyModel = computeSobelGy(bmpModel);
    // saveBitmaptoPNG(TEXTURE_DIRECTORY, "modelFace2DGx.png", bmpModelGx); //save
    // saveBitmaptoPNG(TEXTURE_DIRECTORY, "modelFace2DGy.png", bmpModelGy); //save

    this.E = computeE();

    this.alpha = inputAlpha; // Initialize
    this.beta = inputBeta; // Initialize
    computeAlpha(); // Compute 60 alpha values output
    computeBeta(); // Compute 100 beta values output

    /*        Collections.sort(randomList);
    for(int idx : randomList) {
        Log.d(TAG,"randomList = " + idx);
    }*/
  }
  @Override
  public List<MLCallbackResult> detect(
      final String user,
      final String algorithm,
      UserActivityAggModel userActivity,
      UserProfileEigenModel aModel) {
    RealMatrix inputData = userActivity.matrix();
    LOG.warn(
        "EigenBasedAnomalyDetection predictAnomaly called with dimension: "
            + inputData.getRowDimension()
            + "x"
            + inputData.getColumnDimension());

    if (aModel == null) {
      LOG.warn(
          "nothing to do as the input model does not have required values, returning from evaluating this algorithm..");
      return null;
    }

    List<MLCallbackResult> mlCallbackResults = new ArrayList<MLCallbackResult>();
    RealMatrix normalizedMat = normalizeData(inputData, aModel);

    UserCommandStatistics[] listStats = aModel.statistics();
    int colWithHighVariant = 0;

    for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
      if (listStats[j].isLowVariant() == false) {
        colWithHighVariant++;
      }
    }

    final Map<String, String> context =
        new HashMap<String, String>() {
          {
            put(UserProfileConstants.USER_TAG, user);
            put(UserProfileConstants.ALGORITHM_TAG, algorithm);
          }
        };

    Map<Integer, String> lineNoWithVariantBasedAnomalyDetection = new HashMap<Integer, String>();
    for (int i = 0; i < normalizedMat.getRowDimension(); i++) {
      MLCallbackResult aResult = new MLCallbackResult();
      aResult.setContext(context);

      for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
        // LOG.info("mean for j=" + j + " is:" + listStats[j].getMean());
        // LOG.info("stddev for j=" + j + " is:" + listStats[j].getStddev());
        if (listStats[j].isLowVariant() == true) {
          // LOG.info(listOfCmds[j] + " is low variant");
          if (normalizedMat.getEntry(i, j) > listStats[j].getMean()) {
            lineNoWithVariantBasedAnomalyDetection.put(i, "lowVariantAnomaly");
            aResult.setAnomaly(true);
            aResult.setTimestamp(userActivity.timestamp());
            aResult.setFeature(listStats[j].getCommandName());
            aResult.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM);
            List<String> datapoints = new ArrayList<String>();
            double[] rowVals = inputData.getRow(i);
            for (double rowVal : rowVals) datapoints.add(rowVal + "");
            aResult.setDatapoints(datapoints);
            aResult.setId(user);
            mlCallbackResults.add(aResult);
          } else {
            aResult.setAnomaly(false);
            aResult.setTimestamp(userActivity.timestamp());
            mlCallbackResults.add(aResult);
          }
        }
      }
      // return results;
    }

    // LOG.info("results size here: " + results.length);

    // LOG.info("col with high variant: " + colWithHighVariant);
    RealMatrix finalMatWithoutLowVariantFeatures =
        new Array2DRowRealMatrix(normalizedMat.getRowDimension(), colWithHighVariant);
    // LOG.info("size of final test data: " + finalMatWithoutLowVariantFeatures.getRowDimension()
    // +"x"+ finalMatWithoutLowVariantFeatures.getColumnDimension());
    int finalMatrixRow = 0;
    int finalMatrixCol = 0;
    for (int i = 0; i < normalizedMat.getRowDimension(); i++) {
      for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
        if (listStats[j].isLowVariant() == false) {
          finalMatWithoutLowVariantFeatures.setEntry(
              finalMatrixRow, finalMatrixCol, normalizedMat.getEntry(i, j));
          finalMatrixCol++;
        }
      }
      finalMatrixCol = 0;
      finalMatrixRow++;
    }
    RealVector[] pcs = aModel.principalComponents();
    // LOG.info("pc size: " + pcs.getRowDimension() +"x" + pcs.getColumnDimension());

    RealMatrix finalInputMatTranspose = finalMatWithoutLowVariantFeatures.transpose();

    for (int i = 0; i < finalMatWithoutLowVariantFeatures.getRowDimension(); i++) {
      if (lineNoWithVariantBasedAnomalyDetection.get(i) == null) {
        MLCallbackResult result = new MLCallbackResult();
        result.setContext(context);
        for (int sz = 0; sz < pcs.length; sz++) {
          double[] pc1 = pcs[sz].toArray();
          RealMatrix pc1Mat = new Array2DRowRealMatrix(pc1);
          RealMatrix transposePC1Mat = pc1Mat.transpose();
          RealMatrix testData =
              pc1Mat.multiply(transposePC1Mat).multiply(finalInputMatTranspose.getColumnMatrix(i));
          // LOG.info("testData size: " + testData.getRowDimension() + "x" +
          // testData.getColumnDimension());
          RealMatrix testDataTranspose = testData.transpose();
          // LOG.info("testData transpose size: " + testDataTranspose.getRowDimension() + "x" +
          // testDataTranspose.getColumnDimension());
          RealVector iRowVector = testDataTranspose.getRowVector(0);
          // RealVector pc1Vector = transposePC1Mat.getRowVector(sz);
          RealVector pc1Vector = transposePC1Mat.getRowVector(0);
          double distanceiRowAndPC1 = iRowVector.getDistance(pc1Vector);
          // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " +
          // model.getMaxL2Norm().getEntry(sz));
          // LOG.info("model.getMaxL2Norm().getEntry(sz):" + model.getMaxL2Norm().getEntry(sz));
          if (distanceiRowAndPC1 > aModel.maximumL2Norm().getEntry(sz)) {
            // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " +
            // model.getMaxL2Norm().getEntry(sz));
            result.setAnomaly(true);
            result.setFeature(aModel.statistics()[sz].getCommandName());
            result.setTimestamp(System.currentTimeMillis());
            result.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM);
            List<String> datapoints = new ArrayList<String>();
            double[] rowVals = inputData.getRow(i);
            for (double rowVal : rowVals) datapoints.add(rowVal + "");
            result.setDatapoints(datapoints);
            result.setId(user);
          }
        }
        mlCallbackResults.add(result);
      }
    }
    return mlCallbackResults;
  }
 /**
  * Returns a list containing the indices of NaN values in the input array.
  *
  * @param input the input array
  * @return a list of NaN positions in the input array
  */
 private void rankTransform(RealMatrix matrix) {
   for (int i = 0; i < matrix.getColumnDimension(); i++) {
     matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
   }
 }