private void computeValueHistograms( RDFDataSource source, RDFDataDescriptor desc, RbcAttribute targetAttribute) throws RDFDatabaseException { int numOfClassLabels = targetAttribute.getDomainSize(); int numOfAttributeValues = mAttribute.getDomainSize(); if (mValueHistograms == null) { mValueHistograms = CollectionUtil.makeList(); for (int j = 0; j < numOfClassLabels; j++) { Map<URI, Double> histogram = CollectionUtil.makeMap(); mValueHistograms.add(histogram); } } for (int j = 0; j < numOfClassLabels; j++) { Map<URI, Double> valueHistogram = mValueHistograms.get(j); for (int k = 0; k < numOfAttributeValues; k++) { URI key = mCut.get().get(k); if (valueHistogram.containsKey(key)) continue; SuffStatQueryParameter queryParam = new SuffStatQueryParameter(desc.getTargetType(), targetAttribute, j, mAttribute, k); ISufficentStatistic tempSuffStat = source.getBernoulliSufficientStatistic(queryParam); double valueCount = tempSuffStat.getValue().intValue(); valueHistogram.put(key, valueCount); // System.out.println(queryParam); // System.out.println(tempSuffStat.getValue()); } } }
public void estimateParameters(RDFDataSource source, RDFDataDescriptor desc) throws RDFDatabaseException { // Explicitly ask for class count since every attribute may be HISTOGRAM RbcAttribute targetAttribute = desc.getTargetAttribute(); int numOfClassLabels = targetAttribute.getDomainSize(); double[] classCounts = new double[numOfClassLabels]; for (int j = 0; j < numOfClassLabels; j++) { ISufficentStatistic tempSuffStat = source.getMultinomialSufficientStatistic( new SuffStatQueryParameter(desc.getTargetType(), targetAttribute, j)); classCounts[j] = tempSuffStat.getValue().intValue(); } mClassHistogram = new Histogram(classCounts); mNumInstances = (int) mClassHistogram.sum(); }