@Test public void testRoundtrip() { Estimator estimator = getEstimator(); for (int i = 0; i < estimator.getNumberOfBuckets(); i++) { estimator.setIfGreater(i, i % 16); } DenseEstimator other = new DenseEstimator(estimator.buckets()); assertEquals(estimator.buckets(), other.buckets()); }
/** * Recreate a tree node content with the specified data based on the tree node's existing content. * * @param first The start(inclusive) index of the train data used for creating the tree node * content. * @param last The end(exclusive) index of the train data used for creating the tree node content. * @param node The tree node whose content need to be recreated. * @return The recreated tree node content. */ private TreeNodeContent createContent(int first, int last, TreeNode node) { // Compute the total weight and its class distribution of [first // last) prune cases float totalWeight = 0; AttributeDelegate classAttributeDelegate = this.attributeDelegates[dataSet.getClassAttributeIndex()]; float[] totalClassDistri = new float[this.dataSet.getClassCount()]; Arrays.fill(totalClassDistri, 0); for (int i = first; i < last; i++) { int classLabel = classAttributeDelegate.getClassBranch(this.cases[i]); totalClassDistri[classLabel] += this.weight[cases[i]]; } // Find the original classification of the tree node String nodeClassification = node.getContent().getClassification(); String[] classValues = this.dataSet.getClassValues(); int maxClassIndex = indexOf(nodeClassification, classValues); // Find the most probable classification of the prune data on // the current tree node for (int i = 0; i < totalClassDistri.length; i++) { totalWeight += totalClassDistri[i]; if (totalClassDistri[i] > totalClassDistri[maxClassIndex]) maxClassIndex = i; } String classification = classValues[maxClassIndex]; // Estimate the leafError of the tree node with the [first last) // prune data float basicLeafError = totalWeight - totalClassDistri[maxClassIndex]; float extraLeafError = Estimator.getExtraError(totalWeight, basicLeafError); float estimatedLeafError = basicLeafError + extraLeafError; return new TreeNodeContent(totalWeight, totalClassDistri, classification, estimatedLeafError); }
/** @return true if the estimation was affected by this addition */ public boolean add(long value) { BucketAndHash bucketAndHash = fromHash(computeHash(value), estimator.getNumberOfBuckets()); int lowestBitPosition = Long.numberOfTrailingZeros(bucketAndHash.getHash()) + 1; if (estimator.getClass() == SparseEstimator.class && (estimator.estimateSizeInBytes() >= DenseEstimator.estimateSizeInBytes(estimator.getNumberOfBuckets()) || lowestBitPosition >= SparseEstimator.MAX_BUCKET_VALUE)) { estimator = new DenseEstimator(estimator.buckets()); } return estimator.setIfGreater(bucketAndHash.getBucket(), lowestBitPosition); }
public int[] buckets() { return estimator.buckets(); }
public int getNumberOfBuckets() { return estimator.getNumberOfBuckets(); }
public int getSizeInBytes() { return estimator.estimateSizeInBytes() + INSTANCE_SIZE; }
public long estimate() { return estimator.estimate(); }