Exemplo n.º 1
0
  @Override
  public void run() {
    Database database = input.getDatabase();
    Relation<O> relation = database.getRelation(distance.getInputTypeRestriction());
    DistanceQuery<O> distanceQuery = database.getDistanceQuery(relation, distance);
    KNNQuery<O> knnQ = database.getKNNQuery(distanceQuery, DatabaseQuery.HINT_HEAVY_USE);

    // open file.
    try (RandomAccessFile file = new RandomAccessFile(out, "rw");
        FileChannel channel = file.getChannel();
        // and acquire a file write lock
        FileLock lock = channel.lock()) {
      // write magic header
      file.writeInt(KNN_CACHE_MAGIC);

      int bufsize = k * 12 * 2 + 10; // Initial size, enough for 2 kNN.
      ByteBuffer buffer = ByteBuffer.allocateDirect(bufsize);

      FiniteProgress prog =
          LOG.isVerbose() ? new FiniteProgress("Computing kNN", relation.size(), LOG) : null;

      for (DBIDIter it = relation.iterDBIDs(); it.valid(); it.advance()) {
        final KNNList nn = knnQ.getKNNForDBID(it, k);
        final int nnsize = nn.size();

        // Grow the buffer when needed:
        if (nnsize * 12 + 10 > bufsize) {
          while (nnsize * 12 + 10 > bufsize) {
            bufsize <<= 1;
          }
          buffer = ByteBuffer.allocateDirect(bufsize);
        }

        buffer.clear();
        ByteArrayUtil.writeUnsignedVarint(buffer, it.internalGetIndex());
        ByteArrayUtil.writeUnsignedVarint(buffer, nnsize);
        int c = 0;
        for (DoubleDBIDListIter ni = nn.iter(); ni.valid(); ni.advance(), c++) {
          ByteArrayUtil.writeUnsignedVarint(buffer, ni.internalGetIndex());
          buffer.putDouble(ni.doubleValue());
        }
        if (c != nn.size()) {
          throw new AbortException("Sizes did not agree. Cache is invalid.");
        }

        buffer.flip();
        channel.write(buffer);
        LOG.incrementProcessed(prog);
      }
      LOG.ensureCompleted(prog);
      lock.release();
    } catch (IOException e) {
      LOG.exception(e);
    }
    // FIXME: close!
  }
Exemplo n.º 2
0
 /**
  * Refine a range query.
  *
  * @param neighc Original result
  * @param adjustedEps New epsilon
  * @return refined list
  */
 private DoubleDBIDList refineRange(DoubleDBIDList neighc, double adjustedEps) {
   ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
   // We don't have a guarantee for this list to be sorted
   for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
     DoubleDBIDPair p = neighbor.getPair();
     double dist = p.doubleValue();
     if (dist <= adjustedEps) {
       n.add(dist, p);
     }
   }
   return n;
 }
Exemplo n.º 3
0
 /**
  * Update the density estimates for each object.
  *
  * @param rbod_score Density storage
  * @param referenceDists Distances from current reference point
  */
 protected void updateDensities(
     WritableDoubleDataStore rbod_score, DoubleDBIDList referenceDists) {
   DoubleDBIDListIter it = referenceDists.iter();
   for (int l = 0; l < referenceDists.size(); l++) {
     double density = computeDensity(referenceDists, it, l);
     // computeDensity modified the iterator, reset:
     it.seek(l);
     // NaN indicates the first run.
     if (!(density > rbod_score.doubleValue(it))) {
       rbod_score.putDouble(it, density);
     }
   }
 }
Exemplo n.º 4
0
    /**
     * Compute density in the given subspace.
     *
     * @param subspace Subspace
     * @param neighbors Neighbor distance list
     * @return Density
     */
    protected double subspaceDensity(long[] subspace, DoubleDBIDList neighbors) {
      final double bandwidth = optimalBandwidth(BitsUtil.cardinality(subspace));

      double density = 0;
      for (DoubleDBIDListIter neighbor = neighbors.iter(); neighbor.valid(); neighbor.advance()) {
        double v = neighbor.doubleValue() / bandwidth;
        if (v < 1) {
          density += 1 - (v * v);
        }
      }

      return density / relation.size();
    }
 @Override
 public String toString() {
   StringBuilder buf = new StringBuilder();
   buf.append("kNNList[");
   for (DoubleDBIDListIter iter = this.iter(); iter.valid(); ) {
     buf.append(iter.doubleValue()).append(':').append(DBIDUtil.toString(iter));
     iter.advance();
     if (iter.valid()) {
       buf.append(',');
     }
   }
   buf.append(']');
   return buf.toString();
 }
Exemplo n.º 6
0
 /**
  * Refine neighbors within a subset.
  *
  * @param neighc Neighbor candidates
  * @param dbid Query object
  * @param df distance function
  * @param adjustedEps Epsilon range
  * @param kernel Kernel
  * @return Neighbors of neighbor object
  */
 private DoubleDBIDList subsetNeighborhoodQuery(
     DoubleDBIDList neighc,
     DBIDRef dbid,
     PrimitiveDistanceFunction<? super V> df,
     double adjustedEps,
     KernelDensityEstimator kernel) {
   ModifiableDoubleDBIDList n = DBIDUtil.newDistanceDBIDList(neighc.size());
   V query = kernel.relation.get(dbid);
   for (DoubleDBIDListIter neighbor = neighc.iter(); neighbor.valid(); neighbor.advance()) {
     DoubleDBIDPair p = neighbor.getPair();
     double dist = df.distance(query, kernel.relation.get(p));
     if (dist <= adjustedEps) {
       n.add(dist, p);
     }
   }
   return n;
 }
Exemplo n.º 7
0
  /**
   * Main loop of OUTRES. Run for each object
   *
   * @param s start dimension
   * @param subspace Current subspace
   * @param id Current object ID
   * @param kernel Kernel
   * @return Score
   */
  public double outresScore(
      final int s, long[] subspace, DBIDRef id, KernelDensityEstimator kernel) {
    double score = 1.0; // Initial score is 1.0
    final SubspaceEuclideanDistanceFunction df = new SubspaceEuclideanDistanceFunction(subspace);
    MeanVariance meanv = new MeanVariance();

    for (int i = s; i < kernel.dim; i++) {
      if (BitsUtil.get(subspace, i)) { // TODO: needed? Or should we always start
        // with i=0?
        continue;
      }
      BitsUtil.setI(subspace, i);
      df.setSelectedDimensions(subspace);
      final double adjustedEps = kernel.adjustedEps(kernel.dim);
      // Query with a larger window, to also get neighbors of neighbors
      // Subspace euclidean is metric!
      final double range = adjustedEps * 2.;
      RangeQuery<V> rq = QueryUtil.getRangeQuery(kernel.relation, df, range);

      DoubleDBIDList neighc = rq.getRangeForDBID(id, range);
      DoubleDBIDList neigh = refineRange(neighc, adjustedEps);
      if (neigh.size() > 2) {
        // Relevance test
        if (relevantSubspace(subspace, neigh, kernel)) {
          final double density = kernel.subspaceDensity(subspace, neigh);
          // Compute mean and standard deviation for densities of neighbors.
          meanv.reset();
          for (DoubleDBIDListIter neighbor = neigh.iter(); neighbor.valid(); neighbor.advance()) {
            DoubleDBIDList n2 = subsetNeighborhoodQuery(neighc, neighbor, df, adjustedEps, kernel);
            meanv.put(kernel.subspaceDensity(subspace, n2));
          }
          final double deviation = (meanv.getMean() - density) / (2. * meanv.getSampleStddev());
          // High deviation:
          if (deviation >= 1) {
            score *= (density / deviation);
          }
          // Recursion
          score *= outresScore(i + 1, subspace, id, kernel);
        }
      }
      BitsUtil.clearI(subspace, i);
    }
    return score;
  }
Exemplo n.º 8
0
Arquivo: LOCI.java Projeto: fjfd/elki
  /**
   * Preprocessing step: determine the radii of interest for each point.
   *
   * @param ids IDs to process
   * @param rangeQuery Range query
   * @param interestingDistances Distances of interest
   */
  protected void precomputeInterestingRadii(
      DBIDs ids,
      RangeQuery<O> rangeQuery,
      WritableDataStore<DoubleIntArrayList> interestingDistances) {
    FiniteProgress progressPreproc =
        LOG.isVerbose() ? new FiniteProgress("LOCI preprocessing", ids.size(), LOG) : null;
    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      DoubleDBIDList neighbors = rangeQuery.getRangeForDBID(iditer, rmax);
      // build list of critical distances
      DoubleIntArrayList cdist = new DoubleIntArrayList(neighbors.size() << 1);
      {
        int i = 0;
        DoubleDBIDListIter ni = neighbors.iter();
        while (ni.valid()) {
          final double curdist = ni.doubleValue();
          ++i;
          ni.advance();
          // Skip, if tied to the next object:
          if (ni.valid() && curdist == ni.doubleValue()) {
            continue;
          }
          cdist.append(curdist, i);
          // Scale radius, and reinsert
          if (alpha != 1.) {
            final double ri = curdist / alpha;
            if (ri <= rmax) {
              cdist.append(ri, Integer.MIN_VALUE);
            }
          }
        }
      }
      cdist.sort();

      // fill the gaps to have fast lookups of number of neighbors at a given
      // distance.
      int lastk = 0;
      for (int i = 0, size = cdist.size(); i < size; i++) {
        final int k = cdist.getInt(i);
        if (k == Integer.MIN_VALUE) {
          cdist.setValue(i, lastk);
        } else {
          lastk = k;
        }
      }
      // TODO: shrink the list, removing duplicate radii?

      interestingDistances.put(iditer, cdist);
      LOG.incrementProcessed(progressPreproc);
    }
    LOG.ensureCompleted(progressPreproc);
  }
Exemplo n.º 9
0
  /**
   * Computes the density of an object. The density of an object is the distances to the k nearest
   * neighbors. Neighbors and distances are computed approximately. (approximation for kNN distance:
   * instead of a normal NN search the NN of an object are those objects that have a similar
   * distance to a reference point. The k- nearest neighbors of an object are those objects that lay
   * close to the object in the reference distance vector)
   *
   * @param referenceDists vector of the reference distances
   * @param iter Iterator to this list (will be reused)
   * @param index index of the current object
   * @return density for one object and reference point
   */
  protected double computeDensity(
      DoubleDBIDList referenceDists, DoubleDBIDListIter iter, int index) {
    final int size = referenceDists.size();
    final double xDist = iter.seek(index).doubleValue();

    int lef = index, rig = index;
    double sum = 0.;
    double lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
    double rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
    for (int i = 0; i < k; ++i) {
      if (lef >= 0 && rig < size) {
        // Prefer n or m?
        if (lef_d < rig_d) {
          sum += lef_d;
          // Update left
          lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
        } else {
          sum += rig_d;
          // Update right
          rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
        }
      } else if (lef >= 0) {
        // Choose left, since right is not available.
        sum += lef_d;
        // update left
        lef_d = (--lef >= 0) ? xDist - iter.seek(lef).doubleValue() : Double.POSITIVE_INFINITY;
      } else if (rig < size) {
        // Choose right, since left is not available
        sum += rig_d;
        // Update right
        rig_d = (++rig < size) ? iter.seek(rig).doubleValue() - xDist : Double.POSITIVE_INFINITY;
      } else {
        // Not enough objects in database?
        throw new IndexOutOfBoundsException("Less than k objects?");
      }
    }
    return k / sum;
  }
Exemplo n.º 10
0
Arquivo: LOCI.java Projeto: fjfd/elki
  /**
   * Run the algorithm
   *
   * @param database Database to process
   * @param relation Relation to process
   * @return Outlier result
   */
  public OutlierResult run(Database database, Relation<O> relation) {
    DistanceQuery<O> distFunc = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distFunc);
    DBIDs ids = relation.getDBIDs();

    // LOCI preprocessing step
    WritableDataStore<DoubleIntArrayList> interestingDistances =
        DataStoreUtil.makeStorage(
            relation.getDBIDs(),
            DataStoreFactory.HINT_TEMP | DataStoreFactory.HINT_SORTED,
            DoubleIntArrayList.class);
    precomputeInterestingRadii(ids, rangeQuery, interestingDistances);
    // LOCI main step
    FiniteProgress progressLOCI =
        LOG.isVerbose() ? new FiniteProgress("LOCI scores", relation.size(), LOG) : null;
    WritableDoubleDataStore mdef_norm =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    WritableDoubleDataStore mdef_radius =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
    DoubleMinMax minmax = new DoubleMinMax();

    // Shared instance, to save allocations.
    MeanVariance mv_n_r_alpha = new MeanVariance();

    for (DBIDIter iditer = ids.iter(); iditer.valid(); iditer.advance()) {
      final DoubleIntArrayList cdist = interestingDistances.get(iditer);
      final double maxdist = cdist.getDouble(cdist.size() - 1);
      final int maxneig = cdist.getInt(cdist.size() - 1);

      double maxmdefnorm = 0.0;
      double maxnormr = 0;
      if (maxneig >= nmin) {
        // Compute the largest neighborhood we will need.
        DoubleDBIDList maxneighbors = rangeQuery.getRangeForDBID(iditer, maxdist);
        // TODO: Ensure the result is sorted. This is currently implied.

        // For any critical distance, compute the normalized MDEF score.
        for (int i = 0, size = cdist.size(); i < size; i++) {
          // Only start when minimum size is fulfilled
          if (cdist.getInt(i) < nmin) {
            continue;
          }
          final double r = cdist.getDouble(i);
          final double alpha_r = alpha * r;
          // compute n(p_i, \alpha * r) from list (note: alpha_r is not cdist!)
          final int n_alphar = cdist.getInt(cdist.find(alpha_r));
          // compute \hat{n}(p_i, r, \alpha) and the corresponding \simga_{MDEF}
          mv_n_r_alpha.reset();
          for (DoubleDBIDListIter neighbor = maxneighbors.iter();
              neighbor.valid();
              neighbor.advance()) {
            // Stop at radius r
            if (neighbor.doubleValue() > r) {
              break;
            }
            DoubleIntArrayList cdist2 = interestingDistances.get(neighbor);
            int rn_alphar = cdist2.getInt(cdist2.find(alpha_r));
            mv_n_r_alpha.put(rn_alphar);
          }
          // We only use the average and standard deviation
          final double nhat_r_alpha = mv_n_r_alpha.getMean();
          final double sigma_nhat_r_alpha = mv_n_r_alpha.getNaiveStddev();

          // Redundant divisions by nhat_r_alpha removed.
          final double mdef = nhat_r_alpha - n_alphar;
          final double sigmamdef = sigma_nhat_r_alpha;
          final double mdefnorm = mdef / sigmamdef;

          if (mdefnorm > maxmdefnorm) {
            maxmdefnorm = mdefnorm;
            maxnormr = r;
          }
        }
      } else {
        // FIXME: when nmin was not fulfilled - what is the proper value then?
        maxmdefnorm = Double.POSITIVE_INFINITY;
        maxnormr = maxdist;
      }
      mdef_norm.putDouble(iditer, maxmdefnorm);
      mdef_radius.putDouble(iditer, maxnormr);
      minmax.put(maxmdefnorm);
      LOG.incrementProcessed(progressLOCI);
    }
    LOG.ensureCompleted(progressLOCI);
    DoubleRelation scoreResult =
        new MaterializedDoubleRelation(
            "LOCI normalized MDEF", "loci-mdef-outlier", mdef_norm, relation.getDBIDs());
    OutlierScoreMeta scoreMeta =
        new QuotientOutlierScoreMeta(
            minmax.getMin(), minmax.getMax(), 0.0, Double.POSITIVE_INFINITY, 0.0);
    OutlierResult result = new OutlierResult(scoreMeta, scoreResult);
    result.addChildResult(
        new MaterializedDoubleRelation(
            "LOCI MDEF Radius", "loci-critical-radius", mdef_radius, relation.getDBIDs()));
    return result;
  }