Example #1
0
File: LOCI.java Project: fjfd/elki
  /**
   * Preprocessing step: determine the radii of interest for each point.
   *
   * @param ids IDs to process
   * @param rangeQuery Range query
   * @param interestingDistances Distances of interest
   */
  protected void precomputeInterestingRadii(
      DBIDs ids,
      RangeQuery<O> rangeQuery,
      WritableDataStore<DoubleIntArrayList> interestingDistances) {
    FiniteProgress progressPreproc =
        LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
      // build list of critical distances
      DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
      {
        int i = 0;
        DoubleDBIDListIter ni = neighbors.iter();
        while (ni.valid()) {
          final double curdist = ni.doubleValue();
          ++i;
          ni.advance();
          // Skip, if tied to the next object:
          if (ni.valid() && curdist == ni.doubleValue()) {
            continue;
          }
          cdist.append(curdist, i);
          // Scale radius, and reinsert
          if (alpha != 1.) {
            final double ri = curdist / alpha;
            if (ri <= rmax) {
              cdist.append(ri, Integer.MIN_VALUE);
            }
          }
        }
      }
      cdist.sort();

      // fill the gaps to have fast lookups of number of neighbors at a given
      // distance.
      int lastk = 0;
      for (int i = 0, size = cdist.size(); i < size; i++) {
        final int k = cdist.getInt(i);
        if (k == Integer.MIN_VALUE) {
          cdist.setValue(i, lastk);
        } else {
          lastk = k;
        }
      }
      // TODO: shrink the list, removing duplicate radii?

      interestingDistances.put(iditer, cdist);
      LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
  }
 @Override
 public String toString() {
   StringBuilder buf = new StringBuilder();
   buf.append("kNNList[");
   for (DoubleDBIDListIter iter = this.iter(); iter.valid(); ) {
     buf.append(iter.doubleValue()).append(':').append(DBIDUtil.toString(iter));
     iter.advance();
     if (iter.valid()) {
       buf.append(',');
     }
   }
   buf.append(']');
   return buf.toString();
 }
  @Override
  public void run() {
    Database database = input.getDatabase();
    Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
    DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
    KNNQuery<O> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE);

    // open file.
    try (RandomAccessFile file = new RandomAccessFile(out, "rw");
        FileChannel channel = file.getChannel();
        // and acquire a file write lock
        FileLock lock = channel.lock()) {
      // write magic header
      file.writeInt(KNN_CACHE_MAGIC);

      int bufsize = k * 12 * 2 + 10; // Initial size, enough for 2 kNN.
      ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);

      FiniteProgress prog =
          LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;

      for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        final KNNList nn = knnQ.getKNNForDBID(it, k);
        final int nnsize = nn.size();

        // Grow the buffer when needed:
        if (nnsize * 12 + 10 > bufsize) {
          while (nnsize * 12 + 10 > bufsize) {
            bufsize <<= 1;
          }
          buffer = ByteBuffer.allocateDirect(bufsize);
        }

        buffer.clear();
        ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
        ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
        int c = 0;
        for (DoubleDBIDListIter ni = nn.iter(); ni.valid(); ni.advance(), c++) {
          ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
          buffer.putDouble(ni.doubleValue());
        }
        if (c != nn.size()) {
          throw new AbortException("Sizes did not agree. Cache is invalid.");
        }

        buffer.flip();
        channel.write(buffer);
        LOG.incrementProcessed(prog);
      }
      LOG.ensureCompleted(prog);
      lock.release();
    } catch (IOException e) {
      LOG.exception(e);
    }
    // FIXME: close!
  }
Example #4
0
 /**
  * Refine a range query.
  *
  * @param neighc Original result
  * @param adjustedEps New epsilon
  * @return refined list
  */
 private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) {
   ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
   // We don't have a guarantee for this list to be sorted
   for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
     DoubleDBIDPair p = neighbor.getPair();
     double dist = p.doubleValue();
     if (dist <= adjustedEps) {
       n.add(dist, p);
     }
   }
   return n;
 }
Example #5
0
    /**
     * Compute density in the given subspace.
     *
     * @param subspace Subspace
     * @param neighbors Neighbor distance list
     * @return Density
     */
    protected double subspaceDensity(long[] subspace, DoubleDBIDList neighbors) {
      final double bandwidth = optimalBandwidth(BitsUtil.cardinality(subspace));

      double density = 0;
      for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        double v = neighbor.doubleValue() / bandwidth;
        if (v < 1) {
          density += 1 - (v * v);
        }
      }

      return density / relation.size();
    }
Example #6
0
 /**
  * Refine neighbors within a subset.
  *
  * @param neighc Neighbor candidates
  * @param dbid Query object
  * @param df distance function
  * @param adjustedEps Epsilon range
  * @param kernel Kernel
  * @return Neighbors of neighbor object
  */
 private DoubleDBIDList subsetNeighborhoodQuery(
     DoubleDBIDList neighc,
     DBIDRef dbid,
     PrimitiveDistanceFunction<? super V> df,
     double adjustedEps,
     KernelDensityEstimator kernel) {
   ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
   V query = kernel.relation.get(dbid);
   for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
     DoubleDBIDPair p = neighbor.getPair();
     double dist = df.distance(query, kernel.relation.get(p));
     if (dist <= adjustedEps) {
       n.add(dist, p);
     }
   }
   return n;
 }
Example #7
0
  /**
   * Main loop of OUTRES. Run for each object
   *
   * @param s start dimension
   * @param subspace Current subspace
   * @param id Current object ID
   * @param kernel Kernel
   * @return Score
   */
  public double outresScore(
      final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
    double score = 1.0; // Initial score is 1.0
    final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
    MeanVariance meanv = new MeanVariance();

    for (int i = s; i < kernel.dim; i++) {
      if (BitsUtil.get(subspace, i)) { // TODO: needed? Or should we always start
        // with i=0?
        continue;
      }
      BitsUtil.setI(subspace, i);
      df.setSelectedDimensions(subspace);
      final double adjustedEps = kernel.adjustedEps(kernel.dim);
      // Query with a larger window, to also get neighbors of neighbors
      // Subspace euclidean is metric!
      final double range = adjustedEps * 2.;
      RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);

      DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
      DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
      if (neigh.size() > 2) {
        // Relevance test
        if (relevantSubspace(subspace, neigh, kernel)) {
          final double density = kernel.subspaceDensity(subspace, neigh);
          // Compute mean and standard deviation for densities of neighbors.
          meanv.reset();
          for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
            DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
            meanv.put(kernel.subspaceDensity(subspace, n2));
          }
          final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
          // High deviation:
          if (deviation >= 1) {
            score *= (density / deviation);
          }
          // Recursion
          score *= outresScore(i + 1, subspace, id, kernel);
        }
      }
      BitsUtil.clearI(subspace, i);
    }
    return score;
  }
Example #8
0
File: LOCI.java Project: fjfd/elki
  /**
   * Run the algorithm
   *
   * @param database Database to process
   * @param relation Relation to process
   * @return Outlier result
   */
  public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    DBIDs ids = relation.getDBIDs();

    // LOCI preprocessing step
    WritableDataStore<DoubleIntArrayList> interestingDistances =
        DataStoreUtil.makeStorage(
            relation.getDBIDs(),
            DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED,
            DoubleIntArrayList.class);
    precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
    // LOCI main step
    FiniteProgress progressLOCI =
        LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore mdef_radius =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();

    // Shared instance, to save allocations.
    MeanVariance mv_n_r_alpha = new MeanVariance();

    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      final DoubleIntArrayList cdist = interestingDistances.get(iditer);
      final double maxdist = cdist.getDouble(cdist.size() - 1);
      final int maxneig = cdist.getInt(cdist.size() - 1);

      double maxmdefnorm = 0.0;
      double maxnormr = 0;
      if (maxneig >= nmin) {
        // Compute the largest neighborhood we will need.
        DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
        // TODO: Ensure the result is sorted. This is currently implied.

        // For any critical distance, compute the normalized MDEF score.
        for (int i = 0, size = cdist.size(); i < size; i++) {
          // Only start when minimum size is fulfilled
          if (cdist.getInt(i) < nmin) {
            continue;
          }
          final double r = cdist.getDouble(i);
          final double alpha_r = alpha * r;
          // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
          final int n_alphar = cdist.getInt(cdist.find(alpha_r));
          // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
          mv_n_r_alpha.reset();
          for (DoubleDBIDListIter neighbor = maxneighbors.iter();
              neighbor.valid();
              neighbor.advance()) {
            // Stop at radius r
            if (neighbor.doubleValue() > r) {
              break;
            }
            DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
            int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
            mv_n_r_alpha.put(rn_alphar);
          }
          // We only use the average and standard deviation
          final double nhat_r_alpha = mv_n_r_alpha.getMean();
          final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();

          // Redundant divisions by nhat_r_alpha removed.
          final double mdef = nhat_r_alpha - n_alphar;
          final double sigmamdef = sigma_nhat_r_alpha;
          final double mdefnorm = mdef / sigmamdef;

          if (mdefnorm > maxmdefnorm) {
            maxmdefnorm = mdefnorm;
            maxnormr = r;
          }
        }
      } else {
        // FIXME: when nmin was not fulfilled - what is the proper value then?
        maxmdefnorm = Double.POSITIVE_INFINITY;
        maxnormr = maxdist;
      }
      mdef_norm.putDouble(iditer, maxmdefnorm);
      mdef_radius.putDouble(iditer, maxnormr);
      minmax.put(maxmdefnorm);
      LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult =
        new MaterializedDoubleRelation(
            "LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta =
        new QuotientOutlierScoreMeta(
            minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(
        new MaterializedDoubleRelation(
            "LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
    return result;
  }