コード例 #1
0
  private void computeValueHistograms(
      RDFDataSource source, RDFDataDescriptor desc, RbcAttribute targetAttribute)
      throws RDFDatabaseException {
    int numOfClassLabels = targetAttribute.getDomainSize();
    int numOfAttributeValues = mAttribute.getDomainSize();

    if (mValueHistograms == null) {
      mValueHistograms = CollectionUtil.makeList();
      for (int j = 0; j < numOfClassLabels; j++) {
        Map<URI, Double> histogram = CollectionUtil.makeMap();
        mValueHistograms.add(histogram);
      }
    }

    for (int j = 0; j < numOfClassLabels; j++) {
      Map<URI, Double> valueHistogram = mValueHistograms.get(j);
      for (int k = 0; k < numOfAttributeValues; k++) {
        URI key = mCut.get().get(k);
        if (valueHistogram.containsKey(key)) continue;

        SuffStatQueryParameter queryParam =
            new SuffStatQueryParameter(desc.getTargetType(), targetAttribute, j, mAttribute, k);
        ISufficentStatistic tempSuffStat = source.getBernoulliSufficientStatistic(queryParam);
        double valueCount = tempSuffStat.getValue().intValue();
        valueHistogram.put(key, valueCount);

        // System.out.println(queryParam);
        // System.out.println(tempSuffStat.getValue());
      }
    }
  }
コード例 #2
0
  public void estimateParameters(RDFDataSource source, RDFDataDescriptor desc)
      throws RDFDatabaseException {
    // Explicitly ask for class count since every attribute may be HISTOGRAM
    RbcAttribute targetAttribute = desc.getTargetAttribute();
    int numOfClassLabels = targetAttribute.getDomainSize();

    double[] classCounts = new double[numOfClassLabels];
    for (int j = 0; j < numOfClassLabels; j++) {
      ISufficentStatistic tempSuffStat =
          source.getMultinomialSufficientStatistic(
              new SuffStatQueryParameter(desc.getTargetType(), targetAttribute, j));
      classCounts[j] = tempSuffStat.getValue().intValue();
    }
    mClassHistogram = new Histogram(classCounts);

    mNumInstances = (int) mClassHistogram.sum();
  }