Beispiel #1
0
  public static double Euclidean(double[] a1, double[] a2) {

    RealVector x = MatrixUtils.createRealVector(a1);
    RealVector y = MatrixUtils.createRealVector(a2);

    return x.getDistance(y);
  }
Beispiel #2
0
  public static double vectorLength(RealVector v) {
    // euclidean distance (i.e. L2-norm) from point 0

    double[] v0 = new double[v.getDimension()];
    for (int i = 0; i < v0.length; i++) v0[i] = 0;

    RealVector zero = MatrixUtils.createRealVector(v0);

    return v.getDistance(zero);
  }
  @Override
  public List<MLCallbackResult> detect(
      final String user,
      final String algorithm,
      UserActivityAggModel userActivity,
      UserProfileEigenModel aModel) {
    RealMatrix inputData = userActivity.matrix();
    LOG.warn(
        "EigenBasedAnomalyDetection predictAnomaly called with dimension: "
            + inputData.getRowDimension()
            + "x"
            + inputData.getColumnDimension());

    if (aModel == null) {
      LOG.warn(
          "nothing to do as the input model does not have required values, returning from evaluating this algorithm..");
      return null;
    }

    List<MLCallbackResult> mlCallbackResults = new ArrayList<MLCallbackResult>();
    RealMatrix normalizedMat = normalizeData(inputData, aModel);

    UserCommandStatistics[] listStats = aModel.statistics();
    int colWithHighVariant = 0;

    for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
      if (listStats[j].isLowVariant() == false) {
        colWithHighVariant++;
      }
    }

    final Map<String, String> context =
        new HashMap<String, String>() {
          {
            put(UserProfileConstants.USER_TAG, user);
            put(UserProfileConstants.ALGORITHM_TAG, algorithm);
          }
        };

    Map<Integer, String> lineNoWithVariantBasedAnomalyDetection = new HashMap<Integer, String>();
    for (int i = 0; i < normalizedMat.getRowDimension(); i++) {
      MLCallbackResult aResult = new MLCallbackResult();
      aResult.setContext(context);

      for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
        // LOG.info("mean for j=" + j + " is:" + listStats[j].getMean());
        // LOG.info("stddev for j=" + j + " is:" + listStats[j].getStddev());
        if (listStats[j].isLowVariant() == true) {
          // LOG.info(listOfCmds[j] + " is low variant");
          if (normalizedMat.getEntry(i, j) > listStats[j].getMean()) {
            lineNoWithVariantBasedAnomalyDetection.put(i, "lowVariantAnomaly");
            aResult.setAnomaly(true);
            aResult.setTimestamp(userActivity.timestamp());
            aResult.setFeature(listStats[j].getCommandName());
            aResult.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM);
            List<String> datapoints = new ArrayList<String>();
            double[] rowVals = inputData.getRow(i);
            for (double rowVal : rowVals) datapoints.add(rowVal + "");
            aResult.setDatapoints(datapoints);
            aResult.setId(user);
            mlCallbackResults.add(aResult);
          } else {
            aResult.setAnomaly(false);
            aResult.setTimestamp(userActivity.timestamp());
            mlCallbackResults.add(aResult);
          }
        }
      }
      // return results;
    }

    // LOG.info("results size here: " + results.length);

    // LOG.info("col with high variant: " + colWithHighVariant);
    RealMatrix finalMatWithoutLowVariantFeatures =
        new Array2DRowRealMatrix(normalizedMat.getRowDimension(), colWithHighVariant);
    // LOG.info("size of final test data: " + finalMatWithoutLowVariantFeatures.getRowDimension()
    // +"x"+ finalMatWithoutLowVariantFeatures.getColumnDimension());
    int finalMatrixRow = 0;
    int finalMatrixCol = 0;
    for (int i = 0; i < normalizedMat.getRowDimension(); i++) {
      for (int j = 0; j < normalizedMat.getColumnDimension(); j++) {
        if (listStats[j].isLowVariant() == false) {
          finalMatWithoutLowVariantFeatures.setEntry(
              finalMatrixRow, finalMatrixCol, normalizedMat.getEntry(i, j));
          finalMatrixCol++;
        }
      }
      finalMatrixCol = 0;
      finalMatrixRow++;
    }
    RealVector[] pcs = aModel.principalComponents();
    // LOG.info("pc size: " + pcs.getRowDimension() +"x" + pcs.getColumnDimension());

    RealMatrix finalInputMatTranspose = finalMatWithoutLowVariantFeatures.transpose();

    for (int i = 0; i < finalMatWithoutLowVariantFeatures.getRowDimension(); i++) {
      if (lineNoWithVariantBasedAnomalyDetection.get(i) == null) {
        MLCallbackResult result = new MLCallbackResult();
        result.setContext(context);
        for (int sz = 0; sz < pcs.length; sz++) {
          double[] pc1 = pcs[sz].toArray();
          RealMatrix pc1Mat = new Array2DRowRealMatrix(pc1);
          RealMatrix transposePC1Mat = pc1Mat.transpose();
          RealMatrix testData =
              pc1Mat.multiply(transposePC1Mat).multiply(finalInputMatTranspose.getColumnMatrix(i));
          // LOG.info("testData size: " + testData.getRowDimension() + "x" +
          // testData.getColumnDimension());
          RealMatrix testDataTranspose = testData.transpose();
          // LOG.info("testData transpose size: " + testDataTranspose.getRowDimension() + "x" +
          // testDataTranspose.getColumnDimension());
          RealVector iRowVector = testDataTranspose.getRowVector(0);
          // RealVector pc1Vector = transposePC1Mat.getRowVector(sz);
          RealVector pc1Vector = transposePC1Mat.getRowVector(0);
          double distanceiRowAndPC1 = iRowVector.getDistance(pc1Vector);
          // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " +
          // model.getMaxL2Norm().getEntry(sz));
          // LOG.info("model.getMaxL2Norm().getEntry(sz):" + model.getMaxL2Norm().getEntry(sz));
          if (distanceiRowAndPC1 > aModel.maximumL2Norm().getEntry(sz)) {
            // LOG.info("distance from pc sz: " + sz + " " + distanceiRowAndPC1 + " " +
            // model.getMaxL2Norm().getEntry(sz));
            result.setAnomaly(true);
            result.setFeature(aModel.statistics()[sz].getCommandName());
            result.setTimestamp(System.currentTimeMillis());
            result.setAlgorithm(UserProfileConstants.EIGEN_DECOMPOSITION_ALGORITHM);
            List<String> datapoints = new ArrayList<String>();
            double[] rowVals = inputData.getRow(i);
            for (double rowVal : rowVals) datapoints.add(rowVal + "");
            result.setDatapoints(datapoints);
            result.setId(user);
          }
        }
        mlCallbackResults.add(result);
      }
    }
    return mlCallbackResults;
  }