Пример #1
0
  // Makes and scales the matrices V, D, and VT (to avoid ugly decimals)
  private void makeVDVT(EigenDecomp ed) {
    V = ed.getV();
    D = ed.getD();
    VT = ed.getVT();
    double ref = 0;

    for (int i = 0; i < V.getRowDimension(); i++) {
      ref = 0;
      for (int j = 0; j < V.getColumnDimension(); j++) {
        if (V.getEntry(j, i) != 0 && ref == 0) {
          ref = V.getEntry(j, i);
        }
        if (ref != 0) {
          V.setEntry(j, i, V.getEntry(j, i) / Math.abs(ref));
        }
      }
    }

    for (int i = 0; i < VT.getRowDimension(); i++) {
      ref = 0;
      for (int j = 0; j < VT.getColumnDimension(); j++) {
        if (VT.getEntry(j, i) != 0 && ref == 0) {
          ref = VT.getEntry(j, i);
        }
        if (ref != 0) {
          VT.setEntry(j, i, VT.getEntry(j, i) / Math.abs(ref));
        }
      }
    }
  }
Пример #2
0
 /**
  * Computes the Kendall's Tau rank correlation matrix for the columns of the input matrix.
  *
  * @param matrix matrix with columns representing variables to correlate
  * @return correlation matrix
  */
 public RealMatrix computeCorrelationMatrix(final RealMatrix matrix) {
   int nVars = matrix.getColumnDimension();
   RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
   for (int i = 0; i < nVars; i++) {
     for (int j = 0; j < i; j++) {
       double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
       outMatrix.setEntry(i, j, corr);
       outMatrix.setEntry(j, i, corr);
     }
     outMatrix.setEntry(i, i, 1d);
   }
   return outMatrix;
 }
Пример #3
0
 /**
  * Compute a covariance matrix from a matrix whose columns represent covariates.
  *
  * @param matrix input matrix (must have at least one column and two rows)
  * @param biasCorrected determines whether or not covariance estimates are bias-corrected
  * @return covariance matrix
  * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
  */
 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
     throws MathIllegalArgumentException {
   int dimension = matrix.getColumnDimension();
   Variance variance = new Variance(biasCorrected);
   RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
   for (int i = 0; i < dimension; i++) {
     for (int j = 0; j < i; j++) {
       double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
       outMatrix.setEntry(i, j, cov);
       outMatrix.setEntry(j, i, cov);
     }
     outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
   }
   return outMatrix;
 }
Пример #4
0
 /**
  * Derives a correlation matrix from a covariance matrix.
  *
  * <p>Uses the formula <br>
  * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where <code>r(&middot,&middot;)</code> is the
  * correlation coefficient and <code>s(&middot;)</code> means standard deviation.
  *
  * @param covarianceMatrix the covariance matrix
  * @return correlation matrix
  */
 public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
   int nVars = covarianceMatrix.getColumnDimension();
   RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
   for (int i = 0; i < nVars; i++) {
     double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i));
     outMatrix.setEntry(i, i, 1d);
     for (int j = 0; j < i; j++) {
       double entry =
           covarianceMatrix.getEntry(i, j)
               / (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j)));
       outMatrix.setEntry(i, j, entry);
       outMatrix.setEntry(j, i, entry);
     }
   }
   return outMatrix;
 }
  /**
   * Build a rating matrix from the rating data. Each user's ratings are first normalized by
   * subtracting a baseline score (usually a mean).
   *
   * @param userMapping The index mapping of user IDs to column numbers.
   * @param itemMapping The index mapping of item IDs to row numbers.
   * @return A matrix storing the <i>normalized</i> user ratings.
   */
  private RealMatrix createRatingMatrix(IdIndexMapping userMapping, IdIndexMapping itemMapping) {
    final int nusers = userMapping.size();
    final int nitems = itemMapping.size();

    // Create a matrix with users on rows and items on columns
    logger.info("creating {} by {} rating matrix", nusers, nitems);
    RealMatrix matrix = MatrixUtils.createRealMatrix(nusers, nitems);

    // populate it with data
    Cursor<UserHistory<Event>> users = userEventDAO.streamEventsByUser();
    try {
      for (UserHistory<Event> user : users) {
        // Get the row number for this user
        int u = userMapping.getIndex(user.getUserId());
        MutableSparseVector ratings = Ratings.userRatingVector(user.filter(Rating.class));
        MutableSparseVector baselines = MutableSparseVector.create(ratings.keySet());
        baselineScorer.score(user.getUserId(), baselines);
        // TODO Populate this user's row with their ratings, minus the baseline scores
        for (VectorEntry entry : ratings.fast(State.SET)) {
          long itemid = entry.getKey();
          int i = itemMapping.getIndex(itemid);
          double rating = entry.getValue();
          double baseline = baselines.get(itemid);
          matrix.setEntry(u, i, rating - baseline);
        }
      }
    } finally {
      users.close();
    }

    return matrix;
  }
  /**
   * Calculates {@code P(D_n < d)} using method described in [1] and doubles (see above).
   *
   * @param d statistic
   * @return the two-sided probability of {@code P(D_n < d)}
   * @throws MathArithmeticException if algorithm fails to convert {@code h} to a {@link
   *     org.apache.commons.math3.fraction.BigFraction} in expressing {@code d} as {@code (k - h) /
   *     m} for integer {@code k, m} and {@code 0 <= h < 1}.
   */
  private double roundedK(double d) throws MathArithmeticException {

    final int k = (int) FastMath.ceil(n * d);
    final FieldMatrix<BigFraction> HBigFraction = this.createH(d);
    final int m = HBigFraction.getRowDimension();

    /*
     * Here the rounding part comes into play: use
     * RealMatrix instead of FieldMatrix<BigFraction>
     */
    final RealMatrix H = new Array2DRowRealMatrix(m, m);

    for (int i = 0; i < m; ++i) {
      for (int j = 0; j < m; ++j) {
        H.setEntry(i, j, HBigFraction.getEntry(i, j).doubleValue());
      }
    }

    final RealMatrix Hpower = H.power(n);

    double pFrac = Hpower.getEntry(k - 1, k - 1);

    for (int i = 1; i <= n; ++i) {
      pFrac *= (double) i / (double) n;
    }

    return pFrac;
  }
  public static RealMatrix getRealMatrixFromJamaMatrix(Matrix m) {
    final int rDim = m.getRowDimension();
    final int cDim = m.getColumnDimension();
    RealMatrix rm = new Array2DRowRealMatrix(rDim, cDim);
    for (int i = 0; i < rDim; i++) {
      for (int j = 0; j < cDim; j++) {
        rm.setEntry(i, j, m.get(i, j));
      }
    }

    return rm;
  }
 @Override
 public void train(List<Instance> instances) {
   // ------------------------ initialize rows and columns ---------------------
   int rows = instances.size();
   int columns = 0;
   // get max columns
   for (Instance i : instances) {
     int localColumns = Collections.max(i.getFeatureVector().getFeatureMap().keySet());
     if (localColumns > columns) columns = localColumns;
   }
   // ------------------------ initialize alpha vector -----------------------
   alpha = new ArrayRealVector(rows, 0);
   // ------------------------ initialize base X and Y for use --------------------------
   double[][] X = new double[rows][columns];
   double[] Y = new double[rows];
   for (int i = 0; i < rows; i++) {
     Y[i] = ((ClassificationLabel) instances.get(i).getLabel()).getLabelValue();
     for (int j = 0; j < columns; j++) {
       X[i][j] = instances.get(i).getFeatureVector().get(j + 1);
     }
   }
   // ---------------------- gram matrix -------------------
   matrixX = new Array2DRowRealMatrix(X);
   RealMatrix gram = new Array2DRowRealMatrix(rows, rows);
   for (int i = 0; i < rows; i++) {
     for (int j = 0; j < rows; j++) {
       gram.setEntry(i, j, kernelFunction(matrixX.getRowVector(i), matrixX.getRowVector(j)));
     }
   }
   // ---------------------- gradient ascent --------------------------
   Sigmoid g = new Sigmoid(); // helper function
   System.out.println("Training start...");
   System.out.println(
       "Learning rate: " + _learning_rate + " Training times: " + _training_iterations);
   for (int idx = 0; idx < _training_iterations; idx++) {
     System.out.println("Training iteration: " + (idx + 1));
     for (int k = 0; k < rows; k++) {
       double gradient_ascent = 0.0;
       RealVector alpha_gram = gram.operate(alpha);
       for (int i = 0; i < rows; i++) {
         double lambda = alpha_gram.getEntry(i);
         double kernel = gram.getEntry(i, k);
         gradient_ascent =
             gradient_ascent
                 + Y[i] * g.value(-lambda) * kernel
                 + (1 - Y[i]) * g.value(lambda) * (-kernel);
       }
       alpha.setEntry(k, alpha.getEntry(k) + _learning_rate * gradient_ascent);
     }
   }
   System.out.println("Training done!");
 }
 private RealMatrix normalizeData(RealMatrix matrix, UserProfileEigenModel model) {
   RealMatrix normalizedData =
       new Array2DRowRealMatrix(matrix.getRowDimension(), matrix.getColumnDimension());
   if (LOG.isDebugEnabled()) LOG.debug("model statistics size: " + model.statistics().length);
   for (int i = 0; i < matrix.getRowDimension(); i++) {
     for (int j = 0; j < matrix.getColumnDimension(); j++) {
       double value =
           (matrix.getEntry(i, j) - model.statistics()[j].getMean())
               / model.statistics()[j].getStddev();
       normalizedData.setEntry(i, j, value);
     }
   }
   return normalizedData;
 }
Пример #10
0
 private RealMatrix removeZeroColumns(RealMatrix base, List<Integer> zeroColumns) {
   int adjustedDim = base.getRowDimension() - zeroColumns.size();
   if (adjustedDim == 0) return base;
   RealMatrix adjusted = new Array2DRowRealMatrix(adjustedDim, adjustedDim);
   int i = 0, j = 0;
   for (int basei = 0; basei < base.getRowDimension(); basei++) {
     if (zeroColumns.contains(basei)) continue;
     for (int basej = 0; basej < base.getColumnDimension(); basej++) {
       if (zeroColumns.contains(basej)) continue;
       adjusted.setEntry(i, j++, base.getEntry(basei, basej));
     }
     i++;
     j = 0;
   }
   return adjusted;
 }
Пример #11
0
  @Override
  public RealMatrix computeIOCorrelationMatrix(SampleIterator it) {
    it.reset();
    Sample sample = it.next();
    int inputDim = sample.getEncodedInput().getDimension();
    int outputDim = sample.getEncodedOutput().getDimension();

    RealMatrix M = new Array2DRowRealMatrix(inputDim, outputDim);

    for (int i = 0; i < inputDim; i++) {
      for (int j = 0; j < outputDim; j++) {
        it.reset();
        M.setEntry(i, j, correlationRatio(it, i, j));
      }
    }

    return M;
  }
    public double valueAt(double[] param) {
      double[] sdInv = new double[nVariables];

      for (int i = 0; i < nVariables; i++) {
        R.setEntry(i, i, 1.0 - param[i]);
        sdInv[i] = 1.0 / Sinv.getEntry(i, i);
      }

      DiagonalMatrix diagSdInv = new DiagonalMatrix(sdInv);

      EigenDecomposition eigen = new EigenDecomposition(R);
      RealMatrix eigenVectors = eigen.getV().getSubMatrix(0, nVariables - 1, 0, nFactors - 1);

      double[] ev = new double[nFactors];
      for (int i = 0; i < nFactors; i++) {
        ev[i] = Math.sqrt(eigen.getRealEigenvalue(i));
      }
      DiagonalMatrix evMatrix =
          new DiagonalMatrix(
              ev); // USE Apache version of Diagonal matrix when upgrade to version 3.2
      RealMatrix LAMBDA = eigenVectors.multiply(evMatrix);
      RealMatrix SIGMA = (LAMBDA.multiply(LAMBDA.transpose()));

      double value = 0.0;
      RealMatrix DIF = R.subtract(SIGMA);
      for (int i = 0; i < DIF.getRowDimension(); i++) {
        for (int j = 0; j < DIF.getColumnDimension(); j++) {
          value = DIF.getEntry(i, j);
          DIF.setEntry(i, j, Math.pow(value, 2));
        }
      }

      RealMatrix RESID = diagSdInv.multiply(DIF).multiply(diagSdInv);

      double sum = 0.0;
      for (int i = 0; i < RESID.getRowDimension(); i++) {
        for (int j = 0; j < RESID.getColumnDimension(); j++) {
          sum += RESID.getEntry(i, j);
        }
      }
      return sum;
    }
Пример #13
0
 @Test
 public void testSubtractMatrixNorm() {
   RealMatrix m, mp;
   m = MatrixUtils.createRealMatrix(3, 5);
   mp = MatrixUtils.createRealMatrix(3, 5);
   m.setEntry(1, 2, 0.1613351);
   mp.setEntry(1, 2, 0.486433333333);
   Assert.assertEquals(0.0, Graphulo.nmfError_Client(m, mp), 0.000001);
   m.setEntry(2, 2, 0.76);
   mp.setEntry(2, 2, 0.25);
   Assert.assertEquals(0.0, Graphulo.nmfError_Client(m, mp), 0.000001);
   m.setEntry(2, 1, 0.4);
   mp.setEntry(2, 0, 0.9);
   Assert.assertEquals(2.0 / 3.0, Graphulo.nmfError_Client(m, mp), 0.000001);
 }
  @Override
  public List<MLCallbackResult> detect(
      final String user,
      final String algorithm,
      UserActivityAggModel userActivity,
      UserProfileEigenModel aModel) {
    RealMatrix inputData = userActivity.matrix();
    LOG.warn(
        "EigenBasedAnomalyDetection predictAnomaly called with dimension: "
            + inputData.getRowDimension()
            + "x"
            + inputData.getColumnDimension());

    if (aModel == null) {
      LOG.warn(
          "nothing to do as the input model does not have required values, returning from evaluating this algorithm..");
      return null;
    }

    List<MLCallbackResult> mlCallbackResults = new ArrayList<MLCallbackResult>();
    RealMatrix normalizedMat = normalizeData(inputData, aModel);

    UserCommandStatistics[] listStats = aModel.statistics();
    int colWithHighVariant = 0;

    for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
      if (listStats[j].isLowVariant() == false) {
        colWithHighVariant++;
      }
    }

    final Map<String, String> context =
        new HashMap<String, String>() {
          {
            put(UserProfileConstants.USER_TAG, user);
            put(UserProfileConstants.ALGORITHM_TAG, algorithm);
          }
        };

    Map<Integer, String> lineNoWithVariantBasedAnomalyDetection = new HashMap<Integer, String>();
    for (int i = 0; i < normalizedMat.getRowDimension(); i++) {
      MLCallbackResult aResult = new MLCallbackResult();
      aResult.setContext(context);

      for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
        // LOG.info("mean for j=" + j + " is:" + listStats[j].getMean());
        // LOG.info("stddev for j=" + j + " is:" + listStats[j].getStddev());
        if (listStats[j].isLowVariant() == true) {
          // LOG.info(listOfCmds[j] + " is low variant");
          if (normalizedMat.getEntry(i, j) > listStats[j].getMean()) {
            lineNoWithVariantBasedAnomalyDetection.put(i, "lowVariantAnomaly");
            aResult.setAnomaly(true);
            aResult.setTimestamp(userActivity.timestamp());
            aResult.setFeature(listStats[j].getCommandName());
            aResult.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM);
            List<String> datapoints = new ArrayList<String>();
            double[] rowVals = inputData.getRow(i);
            for (double rowVal : rowVals) datapoints.add(rowVal + "");
            aResult.setDatapoints(datapoints);
            aResult.setId(user);
            mlCallbackResults.add(aResult);
          } else {
            aResult.setAnomaly(false);
            aResult.setTimestamp(userActivity.timestamp());
            mlCallbackResults.add(aResult);
          }
        }
      }
      // return results;
    }

    // LOG.info("results size here: " + results.length);

    // LOG.info("col with high variant: " + colWithHighVariant);
    RealMatrix finalMatWithoutLowVariantFeatures =
        new Array2DRowRealMatrix(normalizedMat.getRowDimension(), colWithHighVariant);
    // LOG.info("size of final test data: " + finalMatWithoutLowVariantFeatures.getRowDimension()
    // +"x"+ finalMatWithoutLowVariantFeatures.getColumnDimension());
    int finalMatrixRow = 0;
    int finalMatrixCol = 0;
    for (int i = 0; i < normalizedMat.getRowDimension(); i++) {
      for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
        if (listStats[j].isLowVariant() == false) {
          finalMatWithoutLowVariantFeatures.setEntry(
              finalMatrixRow, finalMatrixCol, normalizedMat.getEntry(i, j));
          finalMatrixCol++;
        }
      }
      finalMatrixCol = 0;
      finalMatrixRow++;
    }
    RealVector[] pcs = aModel.principalComponents();
    // LOG.info("pc size: " + pcs.getRowDimension() +"x" + pcs.getColumnDimension());

    RealMatrix finalInputMatTranspose = finalMatWithoutLowVariantFeatures.transpose();

    for (int i = 0; i < finalMatWithoutLowVariantFeatures.getRowDimension(); i++) {
      if (lineNoWithVariantBasedAnomalyDetection.get(i) == null) {
        MLCallbackResult result = new MLCallbackResult();
        result.setContext(context);
        for (int sz = 0; sz < pcs.length; sz++) {
          double[] pc1 = pcs[sz].toArray();
          RealMatrix pc1Mat = new Array2DRowRealMatrix(pc1);
          RealMatrix transposePC1Mat = pc1Mat.transpose();
          RealMatrix testData =
              pc1Mat.multiply(transposePC1Mat).multiply(finalInputMatTranspose.getColumnMatrix(i));
          // LOG.info("testData size: " + testData.getRowDimension() + "x" +
          // testData.getColumnDimension());
          RealMatrix testDataTranspose = testData.transpose();
          // LOG.info("testData transpose size: " + testDataTranspose.getRowDimension() + "x" +
          // testDataTranspose.getColumnDimension());
          RealVector iRowVector = testDataTranspose.getRowVector(0);
          // RealVector pc1Vector = transposePC1Mat.getRowVector(sz);
          RealVector pc1Vector = transposePC1Mat.getRowVector(0);
          double distanceiRowAndPC1 = iRowVector.getDistance(pc1Vector);
          // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " +
          // model.getMaxL2Norm().getEntry(sz));
          // LOG.info("model.getMaxL2Norm().getEntry(sz):" + model.getMaxL2Norm().getEntry(sz));
          if (distanceiRowAndPC1 > aModel.maximumL2Norm().getEntry(sz)) {
            // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " +
            // model.getMaxL2Norm().getEntry(sz));
            result.setAnomaly(true);
            result.setFeature(aModel.statistics()[sz].getCommandName());
            result.setTimestamp(System.currentTimeMillis());
            result.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM);
            List<String> datapoints = new ArrayList<String>();
            double[] rowVals = inputData.getRow(i);
            for (double rowVal : rowVals) datapoints.add(rowVal + "");
            result.setDatapoints(datapoints);
            result.setId(user);
          }
        }
        mlCallbackResults.add(result);
      }
    }
    return mlCallbackResults;
  }