Example #1
0
 @Override
 public ProjectedIndex<O, O> instantiate(Relation<O> relation) {
   if (!proj.getInputDataTypeInformation()
       .isAssignableFromType(relation.getDataTypeInformation())) {
     return null;
   }
   proj.initialize(relation.getDataTypeInformation());
   final Relation<O> view;
   if (materialize) {
     DBIDs ids = relation.getDBIDs();
     WritableDataStore<O> content =
         DataStoreUtil.makeStorage(
             ids,
             DataStoreFactory.HINT_DB,
             proj.getOutputDataTypeInformation().getRestrictionClass());
     for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
       content.put(iter, proj.project(relation.get(iter)));
     }
     view =
         new MaterializedRelation<>(
             "ECEF Projection",
             "ecef-projection",
             proj.getOutputDataTypeInformation(),
             content,
             ids);
   } else {
     view = new ProjectedView<>(relation, proj);
   }
   Index inneri = inner.instantiate(view);
   if (inneri == null) {
     return null;
   }
   return new LngLatAsECEFIndex<>(relation, proj, view, inneri, norefine);
 }
Example #2
0
 private void extractItemsets(
     DBIDs iset,
     DBIDs[] idx,
     int[] buf,
     int depth,
     int start,
     int minsupp,
     List<Itemset> solution) {
   // TODO: reuse arrays.
   for (int i = start; i < idx.length; i++) {
     if (idx[i] == null) {
       continue;
     }
     DBIDs ids = mergeJoin(iset, idx[i]);
     if (ids.size() < minsupp) {
       continue;
     }
     buf[depth] = i;
     int[] items = Arrays.copyOf(buf, depth + 1);
     if (depth >= minlength) {
       solution.add(new SparseItemset(items, ids.size()));
     }
     if (depth < maxlength) {
       extractItemsets(ids, idx, buf, depth + 1, i + 1, minsupp, solution);
     }
   }
 }
Example #3
0
  @Override
  public Clustering<KMeansModel> run(Database database, Relation<V> relation) {
    if (relation.size() <= 0) {
      return new Clustering<>("k-Means Clustering", "kmeans-clustering");
    }
    // Choose initial means
    if (LOG.isStatistics()) {
      LOG.statistics(new StringStatistic(KEY + ".initialization", initializer.toString()));
    }
    double[][] means = initializer.chooseInitialMeans(database, relation, k, getDistanceFunction());
    // Setup cluster assignment store
    List<ModifiableDBIDs> clusters = new ArrayList<>();
    for (int i = 0; i < k; i++) {
      clusters.add(DBIDUtil.newHashSet((int) (relation.size() * 2. / k)));
    }
    WritableIntegerDataStore assignment =
        DataStoreUtil.makeIntegerStorage(
            relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT, -1);
    double[] varsum = new double[k];

    IndefiniteProgress prog =
        LOG.isVerbose() ? new IndefiniteProgress("K-Means iteration", LOG) : null;
    DoubleStatistic varstat =
        LOG.isStatistics()
            ? new DoubleStatistic(this.getClass().getName() + ".variance-sum")
            : null;
    int iteration = 0;
    for (; maxiter <= 0 || iteration < maxiter; iteration++) {
      LOG.incrementProcessed(prog);
      boolean changed = assignToNearestCluster(relation, means, clusters, assignment, varsum);
      logVarstat(varstat, varsum);
      // Stop if no cluster assignment changed.
      if (!changed) {
        break;
      }
      // Recompute means.
      means = means(clusters, means, relation);
    }
    LOG.setCompleted(prog);
    if (LOG.isStatistics()) {
      LOG.statistics(new LongStatistic(KEY + ".iterations", iteration));
    }

    // Wrap result
    Clustering<KMeansModel> result = new Clustering<>("k-Means Clustering", "kmeans-clustering");
    for (int i = 0; i < clusters.size(); i++) {
      DBIDs ids = clusters.get(i);
      if (ids.size() == 0) {
        continue;
      }
      KMeansModel model = new KMeansModel(means[i], varsum[i]);
      result.addToplevelCluster(new Cluster<>(ids, model));
    }
    return result;
  }
  /**
   * Run the algorithm on the given relation.
   *
   * @param database Database
   * @param relation Relation to process
   * @return Outlier result
   */
  public OutlierResult run(Database database, Relation<? extends NumberVector> relation) {
    @SuppressWarnings("unchecked")
    PrimitiveDistanceQuery<? super NumberVector> distq =
        (PrimitiveDistanceQuery<? super NumberVector>)
            database.getDistanceQuery(relation, distanceFunction);
    Collection<? extends NumberVector> refPoints = refp.getReferencePoints(relation);
    if (refPoints.size() < 1) {
      throw new AbortException("Cannot compute ROS without reference points!");
    }

    DBIDs ids = relation.getDBIDs();
    if (k >= ids.size()) {
      throw new AbortException("k must not be chosen larger than the database size!");
    }
    // storage of distance/score values.
    WritableDoubleDataStore rbod_score =
        DataStoreUtil.makeDoubleStorage(
            ids, DataStoreFactory.HINT_STATIC | DataStoreFactory.HINT_HOT, Double.NaN);

    // Compute density estimation:
    for (NumberVector refPoint : refPoints) {
      DoubleDBIDList referenceDists = computeDistanceVector(refPoint, relation, distq);
      updateDensities(rbod_score, referenceDists);
    }
    // compute maximum density
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      mm.put(rbod_score.doubleValue(iditer));
    }
    // compute ROS
    double scale = mm.getMax() > 0. ? 1. / mm.getMax() : 1.;
    mm.reset(); // Reuse
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      double score = 1 - (rbod_score.doubleValue(iditer) * scale);
      mm.put(score);
      rbod_score.putDouble(iditer, score);
    }

    DoubleRelation scoreResult =
        new MaterializedDoubleRelation(
            "Reference-points Outlier Scores",
            "reference-outlier",
            rbod_score,
            relation.getDBIDs());
    OutlierScoreMeta scoreMeta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax(), 0., 1., 0.);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    // adds reference points to the result. header information for the
    // visualizer to find the reference points in the result
    result.addChildResult(
        new ReferencePointsResult<>("Reference points", "reference-points", refPoints));
    return result;
  }
Example #5
0
File: LOCI.java Project: fjfd/elki
  /**
   * Preprocessing step: determine the radii of interest for each point.
   *
   * @param ids IDs to process
   * @param rangeQuery Range query
   * @param interestingDistances Distances of interest
   */
  protected void precomputeInterestingRadii(
      DBIDs ids,
      RangeQuery<O> rangeQuery,
      WritableDataStore<DoubleIntArrayList> interestingDistances) {
    FiniteProgress progressPreproc =
        LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
      // build list of critical distances
      DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
      {
        int i = 0;
        DoubleDBIDListIter ni = neighbors.iter();
        while (ni.valid()) {
          final double curdist = ni.doubleValue();
          ++i;
          ni.advance();
          // Skip, if tied to the next object:
          if (ni.valid() && curdist == ni.doubleValue()) {
            continue;
          }
          cdist.append(curdist, i);
          // Scale radius, and reinsert
          if (alpha != 1.) {
            final double ri = curdist / alpha;
            if (ri <= rmax) {
              cdist.append(ri, Integer.MIN_VALUE);
            }
          }
        }
      }
      cdist.sort();

      // fill the gaps to have fast lookups of number of neighbors at a given
      // distance.
      int lastk = 0;
      for (int i = 0, size = cdist.size(); i < size; i++) {
        final int k = cdist.getInt(i);
        if (k == Integer.MIN_VALUE) {
          cdist.setValue(i, lastk);
        } else {
          lastk = k;
        }
      }
      // TODO: shrink the list, removing duplicate radii?

      interestingDistances.put(iditer, cdist);
      LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
  }
Example #6
0
 // TODO: implement diffsets.
 private void extractItemsets(DBIDs[] idx, int start, int minsupp, List<Itemset> solution) {
   int[] buf = new int[idx.length];
   DBIDs iset = idx[start];
   if (iset == null || iset.size() < minsupp) {
     return;
   }
   if (minlength <= 1) {
     solution.add(new OneItemset(start, iset.size()));
   }
   if (maxlength > 1) {
     buf[0] = start;
     extractItemsets(iset, idx, buf, 1, start + 1, minsupp, solution);
   }
 }
 /**
  * Compute the intersection size.
  *
  * @param neighbors1 SORTED neighbor ids of first
  * @param neighbors2 SORTED neighbor ids of second
  * @return Intersection size
  */
 protected static int countSharedNeighbors(DBIDs neighbors1, DBIDs neighbors2) {
   int intersection = 0;
   DBIDIter iter1 = neighbors1.iter();
   DBIDIter iter2 = neighbors2.iter();
   while (iter1.valid() && iter2.valid()) {
     final int comp = DBIDUtil.compare(iter1, iter2);
     if (comp == 0) {
       intersection++;
       iter1.advance();
       iter2.advance();
     } else if (comp < 0) {
       iter1.advance();
     } else // iter2 < iter1
     {
       iter2.advance();
     }
   }
   return intersection;
 }
Example #8
0
  private DBIDs mergeJoin(DBIDs first, DBIDs second) {
    assert (!(first instanceof HashSetDBIDs));
    assert (!(second instanceof HashSetDBIDs));
    ArrayModifiableDBIDs ids = DBIDUtil.newArray();

    DBIDIter i1 = first.iter(), i2 = second.iter();
    while (i1.valid() && i2.valid()) {
      int c = DBIDUtil.compare(i1, i2);
      if (c < 0) {
        i1.advance();
      } else if (c > 0) {
        i2.advance();
      } else {
        ids.add(i1);
        i1.advance();
        i2.advance();
      }
    }
    return ids;
  }
Example #9
0
  /**
   * Inserts the specified objects into this index. If a bulk load mode is implemented, the objects
   * are inserted in one bulk.
   *
   * @param ids the objects to be inserted
   */
  @Override
  public void insertAll(DBIDs ids) {
    if (ids.isEmpty() || (ids.size() == 1)) {
      return;
    }

    // Make an example leaf
    if (canBulkLoad()) {
      List<SpatialEntry> leafs = new ArrayList<>(ids.size());
      for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        leafs.add(createNewLeafEntry(iter));
      }
      bulkLoad(leafs);
    } else {
      for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        insert(DBIDUtil.deref(iter));
      }
    }

    doExtraIntegrityChecks();
  }
Example #10
0
 /**
  * Utility method to test if a given dimension is relevant as determined via a set of reference
  * points (i.e. if the variance along the attribute is lower than the threshold).
  *
  * @param dimension the dimension to test.
  * @param relation used to get actual values for DBIDs.
  * @param points the points to test.
  * @return <code>true</code> if the dimension is relevant.
  */
 private boolean dimensionIsRelevant(int dimension, Relation<V> relation, DBIDs points) {
   double min = Double.POSITIVE_INFINITY, max = Double.NEGATIVE_INFINITY;
   for (DBIDIter iter = points.iter(); iter.valid(); iter.advance()) {
     double xV = relation.get(iter).doubleValue(dimension);
     min = (xV < min) ? xV : min;
     max = (xV > max) ? xV : max;
     if (max - min > w) {
       return false;
     }
   }
   return true;
 }
Example #11
0
  /**
   * Run the ODIN algorithm
   *
   * @param database Database to run on.
   * @param relation Relation to process.
   * @return ODIN outlier result.
   */
  public OutlierResult run(Database database, Relation<O> relation) {
    // Get the query functions:
    DistanceQuery<O> dq = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnq = database.getKNNQuery(dq, k);

    // Get the objects to process, and a data storage for counting and output:
    DBIDs ids = relation.getDBIDs();
    WritableDoubleDataStore scores =
        DataStoreUtil.makeDoubleStorage(ids, DataStoreFactory.HINT_DB, 0.);

    double inc = 1. / (k - 1);
    double min = Double.POSITIVE_INFINITY, max = 0.0;
    // Process all objects
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
      // Find the nearest neighbors (using an index, if available!)
      DBIDs neighbors = knnq.getKNNForDBID(iter, k);
      // For each neighbor, except ourselves, increase the in-degree:
      for (DBIDIter nei = neighbors.iter(); nei.valid(); nei.advance()) {
        if (DBIDUtil.equal(iter, nei)) {
          continue;
        }
        final double value = scores.doubleValue(nei) + inc;
        if (value < min) {
          min = value;
        }
        if (value > max) {
          max = value;
        }
        scores.put(nei, value);
      }
    }

    // Wrap the result and add metadata.
    OutlierScoreMeta meta = new InvertedOutlierScoreMeta(min, max, 0., inc * (ids.size() - 1), 1);
    DoubleRelation rel = new MaterializedDoubleRelation("ODIN In-Degree", "odin", scores, ids);
    return new OutlierResult(meta, rel);
  }
Example #12
0
File: SOF.java Project: 4sp1r3/elki
  /**
   * The main run method
   *
   * @param database Database to use (actually unused)
   * @param spatial Relation for neighborhood
   * @param relation Attributes to evaluate
   * @return Outlier result
   */
  public OutlierResult run(Database database, Relation<N> spatial, Relation<O> relation) {
    final NeighborSetPredicate npred =
        getNeighborSetPredicateFactory().instantiate(database, spatial);
    DistanceQuery<O> distFunc = getNonSpatialDistanceFunction().instantiate(relation);

    WritableDoubleDataStore lrds =
        DataStoreUtil.makeDoubleStorage(
            relation.getDBIDs(), DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_HOT);
    WritableDoubleDataStore lofs =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax lofminmax = new DoubleMinMax();

    // Compute densities
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      DBIDs neighbors = npred.getNeighborDBIDs(iditer);
      double avg = 0;
      for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
        avg += distFunc.distance(iditer, iter);
      }
      double lrd = 1 / (avg / neighbors.size());
      if (Double.isNaN(lrd)) {
        lrd = 0;
      }
      lrds.putDouble(iditer, lrd);
    }

    // Compute density quotients
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      DBIDs neighbors = npred.getNeighborDBIDs(iditer);
      double avg = 0;
      for (DBIDIter iter = neighbors.iter(); iter.valid(); iter.advance()) {
        avg += lrds.doubleValue(iter);
      }
      final double lrd = (avg / neighbors.size()) / lrds.doubleValue(iditer);
      if (!Double.isNaN(lrd)) {
        lofs.putDouble(iditer, lrd);
        lofminmax.put(lrd);
      } else {
        lofs.putDouble(iditer, 0.0);
      }
    }

    // Build result representation.
    DoubleRelation scoreResult =
        new MaterializedDoubleRelation(
            "Spatial Outlier Factor", "sof-outlier", lofs, relation.getDBIDs());
    OutlierScoreMeta scoreMeta =
        new QuotientOutlierScoreMeta(
            lofminmax.getMin(), lofminmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 1.0);
    OutlierResult or = new OutlierResult(scoreMeta, scoreResult);
    or.addChildResult(npred);
    return or;
  }
Example #13
0
  /**
   * Evaluate a single outlier result as histogram.
   *
   * @param database Database to process
   * @param or Outlier result
   * @return Result
   */
  public HistogramResult<DoubleVector> evaluateOutlierResult(Database database, OutlierResult or) {
    if (scaling instanceof OutlierScalingFunction) {
      OutlierScalingFunction oscaling = (OutlierScalingFunction) scaling;
      oscaling.prepare(or);
    }

    ModifiableDBIDs ids = DBIDUtil.newHashSet(or.getScores().getDBIDs());
    DBIDs outlierIds = DatabaseUtil.getObjectsByLabelMatch(database, positiveClassName);
    // first value for outliers, second for each object
    // If we have useful (finite) min/max, use these for binning.
    double min = scaling.getMin();
    double max = scaling.getMax();
    final ObjHistogram<DoubleDoublePair> hist;
    if (Double.isInfinite(min)
        || Double.isNaN(min)
        || Double.isInfinite(max)
        || Double.isNaN(max)) {
      hist =
          new AbstractObjDynamicHistogram<DoubleDoublePair>(bins) {
            @Override
            public DoubleDoublePair aggregate(DoubleDoublePair first, DoubleDoublePair second) {
              first.first += second.first;
              first.second += second.second;
              return first;
            }

            @Override
            protected DoubleDoublePair makeObject() {
              return new DoubleDoublePair(0., 0.);
            }

            @Override
            protected DoubleDoublePair cloneForCache(DoubleDoublePair data) {
              return new DoubleDoublePair(data.first, data.second);
            }

            @Override
            protected DoubleDoublePair downsample(Object[] data, int start, int end, int size) {
              DoubleDoublePair sum = new DoubleDoublePair(0, 0);
              for (int i = start; i < end; i++) {
                DoubleDoublePair p = (DoubleDoublePair) data[i];
                if (p != null) {
                  sum.first += p.first;
                  sum.second += p.second;
                }
              }
              return sum;
            }
          };
    } else {
      hist =
          new AbstractObjStaticHistogram<DoubleDoublePair>(bins, min, max) {
            @Override
            protected DoubleDoublePair makeObject() {
              return new DoubleDoublePair(0., 0.);
            }

            @Override
            public void putData(double coord, DoubleDoublePair data) {
              DoubleDoublePair exist = get(coord);
              exist.first += data.first;
              exist.second += data.second;
            }
          };
    }

    // first fill histogram only with values of outliers
    DoubleDoublePair negative, positive;
    if (!splitfreq) {
      negative = new DoubleDoublePair(1. / ids.size(), 0);
      positive = new DoubleDoublePair(0, 1. / ids.size());
    } else {
      negative = new DoubleDoublePair(1. / (ids.size() - outlierIds.size()), 0);
      positive = new DoubleDoublePair(0, 1. / outlierIds.size());
    }
    ids.removeDBIDs(outlierIds);
    // fill histogram with values of each object
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
      double result = or.getScores().doubleValue(iter);
      result = scaling.getScaled(result);
      if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
        hist.putData(result, negative);
      }
    }
    for (DBIDIter iter = outlierIds.iter(); iter.valid(); iter.advance()) {
      double result = or.getScores().doubleValue(iter);
      result = scaling.getScaled(result);
      if (result > Double.NEGATIVE_INFINITY && result < Double.POSITIVE_INFINITY) {
        hist.putData(result, positive);
      }
    }
    Collection<DoubleVector> collHist = new ArrayList<>(hist.getNumBins());
    for (ObjHistogram.Iter<DoubleDoublePair> iter = hist.iter(); iter.valid(); iter.advance()) {
      DoubleDoublePair data = iter.getValue();
      DoubleVector row = new DoubleVector(new double[] {iter.getCenter(), data.first, data.second});
      collHist.add(row);
    }
    return new HistogramResult<>("Outlier Score Histogram", "outlier-histogram", collHist);
  }
Example #14
0
 @Override
 public void deleteAll(DBIDs ids) {
   for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
     delete(iter);
   }
 }
Example #15
0
File: LOCI.java Project: fjfd/elki
  /**
   * Run the algorithm
   *
   * @param database Database to process
   * @param relation Relation to process
   * @return Outlier result
   */
  public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    DBIDs ids = relation.getDBIDs();

    // LOCI preprocessing step
    WritableDataStore<DoubleIntArrayList> interestingDistances =
        DataStoreUtil.makeStorage(
            relation.getDBIDs(),
            DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED,
            DoubleIntArrayList.class);
    precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
    // LOCI main step
    FiniteProgress progressLOCI =
        LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore mdef_radius =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();

    // Shared instance, to save allocations.
    MeanVariance mv_n_r_alpha = new MeanVariance();

    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      final DoubleIntArrayList cdist = interestingDistances.get(iditer);
      final double maxdist = cdist.getDouble(cdist.size() - 1);
      final int maxneig = cdist.getInt(cdist.size() - 1);

      double maxmdefnorm = 0.0;
      double maxnormr = 0;
      if (maxneig >= nmin) {
        // Compute the largest neighborhood we will need.
        DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
        // TODO: Ensure the result is sorted. This is currently implied.

        // For any critical distance, compute the normalized MDEF score.
        for (int i = 0, size = cdist.size(); i < size; i++) {
          // Only start when minimum size is fulfilled
          if (cdist.getInt(i) < nmin) {
            continue;
          }
          final double r = cdist.getDouble(i);
          final double alpha_r = alpha * r;
          // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
          final int n_alphar = cdist.getInt(cdist.find(alpha_r));
          // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
          mv_n_r_alpha.reset();
          for (DoubleDBIDListIter neighbor = maxneighbors.iter();
              neighbor.valid();
              neighbor.advance()) {
            // Stop at radius r
            if (neighbor.doubleValue() > r) {
              break;
            }
            DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
            int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
            mv_n_r_alpha.put(rn_alphar);
          }
          // We only use the average and standard deviation
          final double nhat_r_alpha = mv_n_r_alpha.getMean();
          final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();

          // Redundant divisions by nhat_r_alpha removed.
          final double mdef = nhat_r_alpha - n_alphar;
          final double sigmamdef = sigma_nhat_r_alpha;
          final double mdefnorm = mdef / sigmamdef;

          if (mdefnorm > maxmdefnorm) {
            maxmdefnorm = mdefnorm;
            maxnormr = r;
          }
        }
      } else {
        // FIXME: when nmin was not fulfilled - what is the proper value then?
        maxmdefnorm = Double.POSITIVE_INFINITY;
        maxnormr = maxdist;
      }
      mdef_norm.putDouble(iditer, maxmdefnorm);
      mdef_radius.putDouble(iditer, maxnormr);
      minmax.put(maxmdefnorm);
      LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult =
        new MaterializedDoubleRelation(
            "LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta =
        new QuotientOutlierScoreMeta(
            minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(
        new MaterializedDoubleRelation(
            "LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
    return result;
  }
Example #16
0
  /**
   * Performs a single run of DOC, finding a single cluster.
   *
   * @param database Database context
   * @param relation used to get actual values for DBIDs.
   * @param S The set of points we're working on.
   * @param d Dimensionality of the data set we're currently working on.
   * @param r Size of random samples.
   * @param m Number of inner iterations (per seed point).
   * @param n Number of outer iterations (seed points).
   * @param minClusterSize Minimum size a cluster must have to be accepted.
   * @return a cluster, if one is found, else <code>null</code>.
   */
  private Cluster<SubspaceModel> runDOC(
      Database database,
      Relation<V> relation,
      ArrayModifiableDBIDs S,
      final int d,
      int n,
      int m,
      int r,
      int minClusterSize) {
    // Best cluster for the current run.
    DBIDs C = null;
    // Relevant attributes for the best cluster.
    long[] D = null;
    // Quality of the best cluster.
    double quality = Double.NEGATIVE_INFINITY;

    // Bounds for our cluster.
    // ModifiableHyperBoundingBox bounds = new ModifiableHyperBoundingBox(new
    // double[d], new double[d]);

    // Weights for distance (= rectangle query)
    SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(BitsUtil.zero(d));
    DistanceQuery<V> dq = database.getDistanceQuery(relation, df);
    RangeQuery<V> rq = database.getRangeQuery(dq);

    // Inform the user about the progress in the current iteration.
    FiniteProgress iprogress =
        LOG.isVerbose()
            ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG)
            : null;

    Random random = rnd.getSingleThreadedRandom();
    DBIDArrayIter iter = S.iter();

    for (int i = 0; i < n; ++i) {
      // Pick a random seed point.
      iter.seek(random.nextInt(S.size()));

      for (int j = 0; j < m; ++j) {
        // Choose a set of random points.
        DBIDs randomSet = DBIDUtil.randomSample(S, r, random);

        // Initialize cluster info.
        long[] nD = BitsUtil.zero(d);

        // Test each dimension and build bounding box.
        for (int k = 0; k < d; ++k) {
          if (dimensionIsRelevant(k, relation, randomSet)) {
            BitsUtil.setI(nD, k);
          }
        }
        if (BitsUtil.cardinality(nD) > 0) {
          // Get all points in the box.
          df.setSelectedDimensions(nD);
          // TODO: add filtering capabilities into query API!
          DBIDs nC = DBIDUtil.intersection(S, rq.getRangeForDBID(iter, w));

          if (LOG.isDebuggingFiner()) {
            LOG.finer(
                "Testing a cluster candidate, |C| = "
                    + nC.size()
                    + ", |D| = "
                    + BitsUtil.cardinality(nD));
          }

          // Is the cluster large enough?
          if (nC.size() < minClusterSize) {
            // Too small.
            if (LOG.isDebuggingFiner()) {
              LOG.finer("... but it's too small.");
            }
          } else {
            // Better cluster than before?
            double nQuality = computeClusterQuality(nC.size(), BitsUtil.cardinality(nD));
            if (nQuality > quality) {
              if (LOG.isDebuggingFiner()) {
                LOG.finer("... and it's the best so far: " + nQuality + " vs. " + quality);
              }
              C = nC;
              D = nD;
              quality = nQuality;
            } else {
              if (LOG.isDebuggingFiner()) {
                LOG.finer("... but we already have a better one.");
              }
            }
          }
        }
        LOG.incrementProcessed(iprogress);
      }
    }
    LOG.ensureCompleted(iprogress);

    return (C != null) ? makeCluster(relation, C, D) : null;
  }
Example #17
0
  /**
   * Performs a single run of FastDOC, finding a single cluster.
   *
   * @param database Database context
   * @param relation used to get actual values for DBIDs.
   * @param S The set of points we're working on.
   * @param d Dimensionality of the data set we're currently working on.
   * @param r Size of random samples.
   * @param m Number of inner iterations (per seed point).
   * @param n Number of outer iterations (seed points).
   * @return a cluster, if one is found, else <code>null</code>.
   */
  private Cluster<SubspaceModel> runFastDOC(
      Database database, Relation<V> relation, ArrayModifiableDBIDs S, int d, int n, int m, int r) {
    // Relevant attributes of highest cardinality.
    long[] D = null;
    // The seed point for the best dimensions.
    DBIDVar dV = DBIDUtil.newVar();

    // Inform the user about the progress in the current iteration.
    FiniteProgress iprogress =
        LOG.isVerbose()
            ? new FiniteProgress("Iteration progress for current cluster", m * n, LOG)
            : null;

    Random random = rnd.getSingleThreadedRandom();

    DBIDArrayIter iter = S.iter();
    outer:
    for (int i = 0; i < n; ++i) {
      // Pick a random seed point.
      iter.seek(random.nextInt(S.size()));

      for (int j = 0; j < m; ++j) {
        // Choose a set of random points.
        DBIDs randomSet = DBIDUtil.randomSample(S, r, random);

        // Initialize cluster info.
        long[] nD = BitsUtil.zero(d);

        // Test each dimension.
        for (int k = 0; k < d; ++k) {
          if (dimensionIsRelevant(k, relation, randomSet)) {
            BitsUtil.setI(nD, k);
          }
        }

        if (D == null || BitsUtil.cardinality(nD) > BitsUtil.cardinality(D)) {
          D = nD;
          dV.set(iter);

          if (BitsUtil.cardinality(D) >= d_zero) {
            if (iprogress != null) {
              iprogress.setProcessed(iprogress.getTotal(), LOG);
            }
            break outer;
          }
        }
        LOG.incrementProcessed(iprogress);
      }
    }
    LOG.ensureCompleted(iprogress);

    // If no relevant dimensions were found, skip it.
    if (D == null || BitsUtil.cardinality(D) == 0) {
      return null;
    }

    // Get all points in the box.
    SubspaceMaximumDistanceFunction df = new SubspaceMaximumDistanceFunction(D);
    DistanceQuery<V> dq = database.getDistanceQuery(relation, df);
    RangeQuery<V> rq = database.getRangeQuery(dq, DatabaseQuery.HINT_SINGLE);

    // TODO: add filtering capabilities into query API!
    DBIDs C = DBIDUtil.intersection(S, rq.getRangeForDBID(dV, w));

    // If we have a non-empty cluster, return it.
    return (C.size() > 0) ? makeCluster(relation, C, D) : null;
  }