private void loadCache(DistanceParser parser, File matrixfile) throws IOException {
    InputStream in =
        new BufferedInputStream(FileUtil.tryGzipInput(new FileInputStream(matrixfile)));
    cache =
        new TLongDoubleHashMap(
            Constants.DEFAULT_CAPACITY,
            Constants.DEFAULT_LOAD_FACTOR,
            -1L,
            Double.POSITIVE_INFINITY);
    min = Integer.MAX_VALUE;
    max = Integer.MIN_VALUE;
    parser.parse(
        in,
        new DistanceCacheWriter() {
          @Override
          public void put(int id1, int id2, double distance) {
            if (id1 < id2) {
              min = id1 < min ? id1 : min;
              max = id2 > max ? id2 : max;
            } else {
              min = id2 < min ? id2 : min;
              max = id1 > max ? id1 : max;
            }
            cache.put(makeKey(id1, id2), distance);
          }

          @Override
          public boolean containsKey(int id1, int id2) {
            return cache.containsKey(makeKey(id1, id2));
          }
        });
    if (min != 0) {
      LOG.verbose(
          "Distance matrix is supposed to be 0-indexed. Choosing offset "
              + min
              + " to compensate.");
    }
  }
Ejemplo n.º 2
0
  /**
   * Run the algorithm.
   *
   * @param database Database to use
   * @param relation Relation to use
   * @return Result
   */
  public OutlierResult run(Database database, Relation<?> relation) {
    WritableDoubleDataStore scores =
        DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);

    DoubleMinMax minmax = new DoubleMinMax();

    try (InputStream in = FileUtil.tryGzipInput(new FileInputStream(file)); //
        TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
      Tokenizer tokenizer = reader.getTokenizer();
      CharSequence buf = reader.getBuffer();
      Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
      reader.reset(in);
      while (reader.nextLineExceptComments()) {
        Integer id = null;
        double score = Double.NaN;
        for (
        /* initialized by nextLineExceptComments */ ; tokenizer.valid(); tokenizer.advance()) {
          mi.region(tokenizer.getStart(), tokenizer.getEnd());
          ms.region(tokenizer.getStart(), tokenizer.getEnd());
          final boolean mif = mi.find();
          final boolean msf = ms.find();
          if (mif && msf) {
            throw new AbortException(
                "ID pattern and score pattern both match value: " + tokenizer.getSubstring());
          }
          if (mif) {
            if (id != null) {
              throw new AbortException(
                  "ID pattern matched twice: previous value "
                      + id
                      + " second value: "
                      + tokenizer.getSubstring());
            }
            id = Integer.parseInt(buf.subSequence(mi.end(), tokenizer.getEnd()).toString());
          }
          if (msf) {
            if (!Double.isNaN(score)) {
              throw new AbortException(
                  "Score pattern matched twice: previous value "
                      + score
                      + " second value: "
                      + tokenizer.getSubstring());
            }
            score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
          }
        }
        if (id != null && !Double.isNaN(score)) {
          scores.putDouble(DBIDUtil.importInteger(id), score);
          minmax.put(score);
        } else if (id == null && Double.isNaN(score)) {
          LOG.warning(
              "Line did not match either ID nor score nor comment: " + reader.getLineNumber());
        } else {
          throw new AbortException(
              "Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
        }
      }
    } catch (IOException e) {
      throw new AbortException(
          "Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
    }

    OutlierScoreMeta meta;
    if (inverted) {
      meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    } else {
      meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
    }
    DoubleRelation scoresult =
        new MaterializedDoubleRelation(
            "External Outlier", "external-outlier", scores, relation.getDBIDs());
    OutlierResult or = new OutlierResult(meta, scoresult);

    // Apply scaling
    if (scaling instanceof OutlierScalingFunction) {
      ((OutlierScalingFunction) scaling).prepare(or);
    }
    DoubleMinMax mm = new DoubleMinMax();
    for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
      double val = scoresult.doubleValue(iditer);
      val = scaling.getScaled(val);
      scores.putDouble(iditer, val);
      mm.put(val);
    }
    meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
    or = new OutlierResult(meta, scoresult);
    return or;
  }