예제 #1
0
파일: LOCI.java 프로젝트: fjfd/elki
  /**
   * Preprocessing step: determine the radii of interest for each point.
   *
   * @param ids IDs to process
   * @param rangeQuery Range query
   * @param interestingDistances Distances of interest
   */
  protected void precomputeInterestingRadii(
      DBIDs ids,
      RangeQuery<O> rangeQuery,
      WritableDataStore<DoubleIntArrayList> interestingDistances) {
    FiniteProgress progressPreproc =
        LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
      // build list of critical distances
      DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
      {
        int i = 0;
        DoubleDBIDListIter ni = neighbors.iter();
        while (ni.valid()) {
          final double curdist = ni.doubleValue();
          ++i;
          ni.advance();
          // Skip, if tied to the next object:
          if (ni.valid() && curdist == ni.doubleValue()) {
            continue;
          }
          cdist.append(curdist, i);
          // Scale radius, and reinsert
          if (alpha != 1.) {
            final double ri = curdist / alpha;
            if (ri <= rmax) {
              cdist.append(ri, Integer.MIN_VALUE);
            }
          }
        }
      }
      cdist.sort();

      // fill the gaps to have fast lookups of number of neighbors at a given
      // distance.
      int lastk = 0;
      for (int i = 0, size = cdist.size(); i < size; i++) {
        final int k = cdist.getInt(i);
        if (k == Integer.MIN_VALUE) {
          cdist.setValue(i, lastk);
        } else {
          lastk = k;
        }
      }
      // TODO: shrink the list, removing duplicate radii?

      interestingDistances.put(iditer, cdist);
      LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
  }
예제 #2
0
파일: LOCI.java 프로젝트: fjfd/elki
  /**
   * Run the algorithm
   *
   * @param database Database to process
   * @param relation Relation to process
   * @return Outlier result
   */
  public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    DBIDs ids = relation.getDBIDs();

    // LOCI preprocessing step
    WritableDataStore<DoubleIntArrayList> interestingDistances =
        DataStoreUtil.makeStorage(
            relation.getDBIDs(),
            DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED,
            DoubleIntArrayList.class);
    precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
    // LOCI main step
    FiniteProgress progressLOCI =
        LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore mdef_radius =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();

    // Shared instance, to save allocations.
    MeanVariance mv_n_r_alpha = new MeanVariance();

    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      final DoubleIntArrayList cdist = interestingDistances.get(iditer);
      final double maxdist = cdist.getDouble(cdist.size() - 1);
      final int maxneig = cdist.getInt(cdist.size() - 1);

      double maxmdefnorm = 0.0;
      double maxnormr = 0;
      if (maxneig >= nmin) {
        // Compute the largest neighborhood we will need.
        DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
        // TODO: Ensure the result is sorted. This is currently implied.

        // For any critical distance, compute the normalized MDEF score.
        for (int i = 0, size = cdist.size(); i < size; i++) {
          // Only start when minimum size is fulfilled
          if (cdist.getInt(i) < nmin) {
            continue;
          }
          final double r = cdist.getDouble(i);
          final double alpha_r = alpha * r;
          // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
          final int n_alphar = cdist.getInt(cdist.find(alpha_r));
          // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
          mv_n_r_alpha.reset();
          for (DoubleDBIDListIter neighbor = maxneighbors.iter();
              neighbor.valid();
              neighbor.advance()) {
            // Stop at radius r
            if (neighbor.doubleValue() > r) {
              break;
            }
            DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
            int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
            mv_n_r_alpha.put(rn_alphar);
          }
          // We only use the average and standard deviation
          final double nhat_r_alpha = mv_n_r_alpha.getMean();
          final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();

          // Redundant divisions by nhat_r_alpha removed.
          final double mdef = nhat_r_alpha - n_alphar;
          final double sigmamdef = sigma_nhat_r_alpha;
          final double mdefnorm = mdef / sigmamdef;

          if (mdefnorm > maxmdefnorm) {
            maxmdefnorm = mdefnorm;
            maxnormr = r;
          }
        }
      } else {
        // FIXME: when nmin was not fulfilled - what is the proper value then?
        maxmdefnorm = Double.POSITIVE_INFINITY;
        maxnormr = maxdist;
      }
      mdef_norm.putDouble(iditer, maxmdefnorm);
      mdef_radius.putDouble(iditer, maxnormr);
      minmax.put(maxmdefnorm);
      LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult =
        new MaterializedDoubleRelation(
            "LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta =
        new QuotientOutlierScoreMeta(
            minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(
        new MaterializedDoubleRelation(
            "LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
    return result;
  }