예제 #1
0
  private void calculateBuckets(
      Set<? extends Person> persons,
      DynamicDoubleArray sums,
      DynamicIntArray counts,
      String xAttrKey,
      String yAttrKey) {
    TIntDoubleHashMap sumBuckets = new TIntDoubleHashMap();
    TIntIntHashMap countBuckets = new TIntIntHashMap();

    for (Person person : persons) {
      String xValStr = person.getAttribute(xAttrKey);
      String yValStr = person.getAttribute(yAttrKey);

      if (xValStr != null && yValStr != null) {
        double xVal = Double.parseDouble(xValStr);
        double yVal = Double.parseDouble(yValStr);

        int bucketIdx = xDataDiscr.index(xVal);

        sumBuckets.adjustOrPutValue(bucketIdx, yVal, yVal);
        countBuckets.adjustOrPutValue(bucketIdx, 1, 1);
      }
    }

    TIntDoubleIterator it = sumBuckets.iterator();
    for (int i = 0; i < sumBuckets.size(); i++) {
      it.advance();
      int bucketIndex = it.key();
      double sum = it.value();
      int cnt = countBuckets.get(bucketIndex);

      sums.set(bucketIndex, sum);
      counts.set(bucketIndex, cnt);
    }
  }
예제 #2
0
  public void count() {

    TIntIntHashMap docCounts = new TIntIntHashMap();

    int index = 0;

    if (instances.size() == 0) {
      logger.info("Instance list is empty");
      return;
    }

    if (instances.get(0).getData() instanceof FeatureSequence) {

      for (Instance instance : instances) {
        FeatureSequence features = (FeatureSequence) instance.getData();

        for (int i = 0; i < features.getLength(); i++) {
          docCounts.adjustOrPutValue(features.getIndexAtPosition(i), 1, 1);
        }

        int[] keys = docCounts.keys();
        for (int i = 0; i < keys.length - 1; i++) {
          int feature = keys[i];
          featureCounts[feature] += docCounts.get(feature);
          documentFrequencies[feature]++;
        }

        docCounts = new TIntIntHashMap();

        index++;
        if (index % 1000 == 0) {
          System.err.println(index);
        }
      }
    } else if (instances.get(0).getData() instanceof FeatureVector) {

      for (Instance instance : instances) {
        FeatureVector features = (FeatureVector) instance.getData();

        for (int location = 0; location < features.numLocations(); location++) {
          int feature = features.indexAtLocation(location);
          double value = features.valueAtLocation(location);

          documentFrequencies[feature]++;
          featureCounts[feature] += value;
        }

        index++;
        if (index % 1000 == 0) {
          System.err.println(index);
        }
      }
    } else {
      logger.info("Unsupported data class: " + instances.get(0).getData().getClass().getName());
    }
  }