示例#1
0
  @Test
  public void testRoundtrip() {
    Estimator estimator = getEstimator();

    for (int i = 0; i < estimator.getNumberOfBuckets(); i++) {
      estimator.setIfGreater(i, i % 16);
    }

    DenseEstimator other = new DenseEstimator(estimator.buckets());
    assertEquals(estimator.buckets(), other.buckets());
  }
示例#2
0
  /**
   * Recreate a tree node content with the specified data based on the tree node's existing content.
   *
   * @param first The start(inclusive) index of the train data used for creating the tree node
   *     content.
   * @param last The end(exclusive) index of the train data used for creating the tree node content.
   * @param node The tree node whose content need to be recreated.
   * @return The recreated tree node content.
   */
  private TreeNodeContent createContent(int first, int last, TreeNode node) {
    // Compute the total weight and its class distribution of [first
    // last) prune cases
    float totalWeight = 0;
    AttributeDelegate classAttributeDelegate =
        this.attributeDelegates[dataSet.getClassAttributeIndex()];
    float[] totalClassDistri = new float[this.dataSet.getClassCount()];
    Arrays.fill(totalClassDistri, 0);
    for (int i = first; i < last; i++) {
      int classLabel = classAttributeDelegate.getClassBranch(this.cases[i]);
      totalClassDistri[classLabel] += this.weight[cases[i]];
    }

    // Find the original classification of the tree node
    String nodeClassification = node.getContent().getClassification();
    String[] classValues = this.dataSet.getClassValues();
    int maxClassIndex = indexOf(nodeClassification, classValues);

    // Find the most probable classification of the prune data on
    // the current tree node
    for (int i = 0; i < totalClassDistri.length; i++) {
      totalWeight += totalClassDistri[i];
      if (totalClassDistri[i] > totalClassDistri[maxClassIndex]) maxClassIndex = i;
    }

    String classification = classValues[maxClassIndex];

    // Estimate the leafError of the tree node with the [first last)
    // prune data
    float basicLeafError = totalWeight - totalClassDistri[maxClassIndex];
    float extraLeafError = Estimator.getExtraError(totalWeight, basicLeafError);
    float estimatedLeafError = basicLeafError + extraLeafError;

    return new TreeNodeContent(totalWeight, totalClassDistri, classification, estimatedLeafError);
  }
示例#3
0
  /** @return true if the estimation was affected by this addition */
  public boolean add(long value) {
    BucketAndHash bucketAndHash = fromHash(computeHash(value), estimator.getNumberOfBuckets());
    int lowestBitPosition = Long.numberOfTrailingZeros(bucketAndHash.getHash()) + 1;

    if (estimator.getClass() == SparseEstimator.class
        && (estimator.estimateSizeInBytes()
                >= DenseEstimator.estimateSizeInBytes(estimator.getNumberOfBuckets())
            || lowestBitPosition >= SparseEstimator.MAX_BUCKET_VALUE)) {
      estimator = new DenseEstimator(estimator.buckets());
    }

    return estimator.setIfGreater(bucketAndHash.getBucket(), lowestBitPosition);
  }
示例#4
0
 public int[] buckets() {
   return estimator.buckets();
 }
示例#5
0
 public int getNumberOfBuckets() {
   return estimator.getNumberOfBuckets();
 }
示例#6
0
 public int getSizeInBytes() {
   return estimator.estimateSizeInBytes() + INSTANCE_SIZE;
 }
示例#7
0
 public long estimate() {
   return estimator.estimate();
 }