Java DataHandleStatistics.getValue Examples

Programming Language: Java

Namespace/Package Name: org.deidentifier.arx

Method/Function: getValue

Examples at hotexamples.com: 3

Java DataHandleStatistics.getValue - 3 examples found. These are the top rated real world Java examples of org.deidentifier.arx.DataHandleStatistics.getValue extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getAttributeName(3)

getDataType(3)

getNumRows(3)

getValue(3)

getBaseDataType(2)

getGeneralization(2)

getSuppressionString(2)

getDefinition(1)

getNumColumns(1)

isSuppressed(1)

Example #1

Show file

File: StatisticsBuilder.java Project: kentoa/arx

  /**
   * Returns a frequency distribution for the values in the given column. The order for string data
   * items is derived from the provided hierarchy
   *
   * @param column The column
   * @param hierarchy The hierarchy, may be null
   * @return
   */
  public StatisticsFrequencyDistribution getFrequencyDistribution(int column, Hierarchy hierarchy) {

    // Reset stop flag
    interrupt = false;

    // Init
    String[] values = getDistinctValuesOrdered(column, hierarchy);
    double[] frequencies = new double[values.length];

    // Create map of indexes
    Map<String, Integer> indexes = new HashMap<String, Integer>();
    for (int i = 0; i < values.length; i++) {
      checkInterrupt();
      indexes.put(values[i], i);
    }

    // Count frequencies
    for (int row = 0; row < handle.getNumRows(); row++) {
      checkInterrupt();
      String value = handle.getValue(row, column);
      frequencies[indexes.get(value)]++;
    }

    // Divide by count
    int count = handle.getNumRows();
    for (int i = 0; i < frequencies.length; i++) {
      checkInterrupt();
      frequencies[i] /= (double) count;
    }

    // Return
    return new StatisticsFrequencyDistribution(values, frequencies, count);
  }

Example #2

Show file

File: StatisticsBuilder.java Project: kentoa/arx

  /**
   * Returns summary statistics for all attributes.
   *
   * @param listwiseDeletion A flag enabling list-wise deletion
   * @return
   */
  @SuppressWarnings({"unchecked", "rawtypes"})
  public <T> Map<String, StatisticsSummary<?>> getSummaryStatistics(boolean listwiseDeletion) {

    Map<String, DescriptiveStatistics> statistics = new HashMap<String, DescriptiveStatistics>();
    Map<String, StatisticsSummaryOrdinal> ordinal = new HashMap<String, StatisticsSummaryOrdinal>();
    Map<String, DataScale> scales = new HashMap<String, DataScale>();

    // Detect scales
    for (int col = 0; col < handle.getNumColumns(); col++) {

      // Meta
      String attribute = handle.getAttributeName(col);
      DataType<?> type = handle.getDataType(attribute);

      // Scale
      DataScale scale = type.getDescription().getScale();

      // Try to replace nominal scale with ordinal scale based on base data type
      if (scale == DataScale.NOMINAL && handle.getGeneralization(attribute) != 0) {
        if (!(handle.getBaseDataType(attribute) instanceof ARXString)
            && getHierarchy(col, true) != null) {
          scale = DataScale.ORDINAL;
        }
      }

      // Store
      scales.put(attribute, scale);
      statistics.put(attribute, new DescriptiveStatistics());
      ordinal.put(
          attribute,
          getSummaryStatisticsOrdinal(
              handle.getGeneralization(attribute),
              handle.getDataType(attribute),
              handle.getBaseDataType(attribute),
              getHierarchy(col, true)));
    }

    // Compute summary statistics
    for (int row = 0; row < handle.getNumRows(); row++) {

      // Check, if we should include this row
      boolean include = true;
      if (listwiseDeletion) {
        for (int col = 0; col < handle.getNumColumns(); col++) {
          if (handle.isSuppressed(row) || DataType.isNull(handle.getValue(row, col))) {
            include = false;
            break;
          }
        }
      }

      // Check
      checkInterrupt();

      // If yes, add
      if (include) {

        // For each column
        for (int col = 0; col < handle.getNumColumns(); col++) {

          // Meta
          String value = handle.getValue(row, col);
          String attribute = handle.getAttributeName(col);
          DataType<?> type = handle.getDataType(attribute);

          // Analyze
          if (!value.equals(handle.getSuppressionString()) && !DataType.isNull(value)) {
            ordinal.get(attribute).addValue(value);
            if (type instanceof DataTypeWithRatioScale) {
              statistics
                  .get(attribute)
                  .addValue(((DataTypeWithRatioScale) type).toDouble(type.parse(value)));
            }
          }
        }
      }
    }

    // Convert
    Map<String, StatisticsSummary<?>> result = new HashMap<String, StatisticsSummary<?>>();
    for (int col = 0; col < handle.getNumColumns(); col++) {

      // Check
      checkInterrupt();

      // Depending on scale
      String attribute = handle.getAttributeName(col);
      DataScale scale = scales.get(attribute);
      DataType<T> type = (DataType<T>) handle.getDataType(attribute);
      ordinal.get(attribute).analyze();
      if (scale == DataScale.NOMINAL) {
        StatisticsSummaryOrdinal stats = ordinal.get(attribute);
        result.put(
            attribute,
            new StatisticsSummary<T>(
                DataScale.NOMINAL,
                stats.getNumberOfMeasures(),
                stats.getMode(),
                type.parse(stats.getMode())));
      } else if (scale == DataScale.ORDINAL) {
        StatisticsSummaryOrdinal stats = ordinal.get(attribute);
        result.put(
            attribute,
            new StatisticsSummary<T>(
                DataScale.ORDINAL,
                stats.getNumberOfMeasures(),
                stats.getMode(),
                type.parse(stats.getMode()),
                stats.getMedian(),
                type.parse(stats.getMedian()),
                stats.getMin(),
                type.parse(stats.getMin()),
                stats.getMax(),
                type.parse(stats.getMax())));
      } else if (scale == DataScale.INTERVAL) {
        StatisticsSummaryOrdinal stats = ordinal.get(attribute);
        DescriptiveStatistics stats2 = statistics.get(attribute);
        boolean isPeriod = type.getDescription().getWrappedClass() == Date.class;

        // TODO: Something is wrong with commons math's kurtosis
        double kurtosis = stats2.getKurtosis();
        kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
        double range = stats2.getMax() - stats2.getMin();
        double stddev = Math.sqrt(stats2.getVariance());

        result.put(
            attribute,
            new StatisticsSummary<T>(
                DataScale.INTERVAL,
                stats.getNumberOfMeasures(),
                stats.getMode(),
                type.parse(stats.getMode()),
                stats.getMedian(),
                type.parse(stats.getMedian()),
                stats.getMin(),
                type.parse(stats.getMin()),
                stats.getMax(),
                type.parse(stats.getMax()),
                toString(type, stats2.getMean(), false, false),
                toValue(type, stats2.getMean()),
                stats2.getMean(),
                toString(type, stats2.getVariance(), isPeriod, true),
                toValue(type, stats2.getVariance()),
                stats2.getVariance(),
                toString(type, stats2.getPopulationVariance(), isPeriod, true),
                toValue(type, stats2.getPopulationVariance()),
                stats2.getPopulationVariance(),
                toString(type, stddev, isPeriod, false),
                toValue(type, stddev),
                stddev,
                toString(type, range, isPeriod, false),
                toValue(type, range),
                stats2.getMax() - stats2.getMin(),
                toString(type, kurtosis, isPeriod, false),
                toValue(type, kurtosis),
                kurtosis));
      } else if (scale == DataScale.RATIO) {
        StatisticsSummaryOrdinal stats = ordinal.get(attribute);
        DescriptiveStatistics stats2 = statistics.get(attribute);

        // TODO: Something is wrong with commons math's kurtosis
        double kurtosis = stats2.getKurtosis();
        kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
        double range = stats2.getMax() - stats2.getMin();
        double stddev = Math.sqrt(stats2.getVariance());

        result.put(
            attribute,
            new StatisticsSummary<T>(
                DataScale.RATIO,
                stats.getNumberOfMeasures(),
                stats.getMode(),
                type.parse(stats.getMode()),
                stats.getMedian(),
                type.parse(stats.getMedian()),
                stats.getMin(),
                type.parse(stats.getMin()),
                stats.getMax(),
                type.parse(stats.getMax()),
                toString(type, stats2.getMean(), false, false),
                toValue(type, stats2.getMean()),
                stats2.getMean(),
                toString(type, stats2.getVariance(), false, false),
                toValue(type, stats2.getVariance()),
                stats2.getVariance(),
                toString(type, stats2.getPopulationVariance(), false, false),
                toValue(type, stats2.getPopulationVariance()),
                stats2.getPopulationVariance(),
                toString(type, stddev, false, false),
                toValue(type, stddev),
                stddev,
                toString(type, range, false, false),
                toValue(type, range),
                range,
                toString(type, kurtosis, false, false),
                toValue(type, kurtosis),
                kurtosis,
                toString(type, stats2.getGeometricMean(), false, false),
                toValue(type, stats2.getGeometricMean()),
                stats2.getGeometricMean()));
      }
    }

    return result;
  }

Example #3

Show file

File: StatisticsBuilder.java Project: kentoa/arx

  /**
   * Returns a contingency table for the given columns. The order for string data items is derived
   * from the provided hierarchies
   *
   * @param column1 The first column
   * @param hierarchy1 The hierarchy for the first column, may be null
   * @param column2 The second column
   * @param hierarchy2 The hierarchy for the second column, may be null
   * @return
   */
  public StatisticsContingencyTable getContingencyTable(
      int column1, Hierarchy hierarchy1, int column2, Hierarchy hierarchy2) {

    // Reset stop flag
    interrupt = false;

    // Init
    String[] values1 = getDistinctValuesOrdered(column1, hierarchy1);
    String[] values2 = getDistinctValuesOrdered(column2, hierarchy2);

    // Create maps of indexes
    Map<String, Integer> indexes1 = new HashMap<String, Integer>();
    for (int i = 0; i < values1.length; i++) {
      checkInterrupt();
      indexes1.put(values1[i], i);
    }
    Map<String, Integer> indexes2 = new HashMap<String, Integer>();
    for (int i = 0; i < values2.length; i++) {
      checkInterrupt();
      indexes2.put(values2[i], i);
    }

    // Create entry set
    int max = Integer.MIN_VALUE;
    final Map<Entry, Integer> entries = new HashMap<Entry, Integer>();
    for (int row = 0; row < handle.getNumRows(); row++) {
      checkInterrupt();
      int index1 = indexes1.get(handle.getValue(row, column1));
      int index2 = indexes2.get(handle.getValue(row, column2));
      Entry entry = new Entry(index1, index2);
      Integer previous = entries.get(entry);
      int value = previous != null ? previous + 1 : 1;
      max = Math.max(max, value);
      entries.put(entry, value);
    }

    // Create iterator
    final int count = handle.getNumRows();
    final Iterator<Entry> internal = entries.keySet().iterator();
    final Iterator<Entry> iterator =
        new Iterator<Entry>() {

          private Map<Entry, Integer> _entries = entries;
          private Iterator<Entry> _internal = internal;

          @Override
          public boolean hasNext() {

            if (_internal == null) return false;
            boolean result = _internal.hasNext();

            // Try to release resources as early as possible
            if (!result) {
              _internal = null;
              _entries = null;
            }
            return result;
          }

          @Override
          public Entry next() {
            if (_internal == null) return null;
            Entry e = _internal.next();
            e.frequency = (double) _entries.get(e) / (double) count;
            return e;
          }

          @Override
          public void remove() {
            throw new UnsupportedOperationException();
          }
        };

    // Result result
    return new StatisticsContingencyTable(
        values1, values2, count, (double) max / (double) count, iterator);
  }