Ejemplo n.º 1
0
  @Override
  public void run(InputRow row, int distinctCount) {
    for (InputColumn<String> column : _columns) {
      String value = row.getValue(column);

      StringAnalyzerColumnDelegate delegate = _columnDelegates.get(column);
      delegate.run(row, value, distinctCount);
    }
  }
Ejemplo n.º 2
0
  @Override
  public StringAnalyzerResult getResult() {
    logger.info("getResult()");
    CrosstabDimension measureDimension = new CrosstabDimension(DIMENSION_MEASURES);
    measureDimension.addCategory(MEASURE_ROW_COUNT);
    measureDimension.addCategory(MEASURE_NULL_COUNT);
    measureDimension.addCategory(MEASURE_BLANK_COUNT);
    measureDimension.addCategory(MEASURE_ENTIRELY_UPPERCASE_COUNT);
    measureDimension.addCategory(MEASURE_ENTIRELY_LOWERCASE_COUNT);
    measureDimension.addCategory(MEASURE_TOTAL_CHAR_COUNT);
    measureDimension.addCategory(MEASURE_MAX_CHARS);
    measureDimension.addCategory(MEASURE_MIN_CHARS);
    measureDimension.addCategory(MEASURE_AVG_CHARS);
    measureDimension.addCategory(MEASURE_MAX_WHITE_SPACES);
    measureDimension.addCategory(MEASURE_MIN_WHITE_SPACES);
    measureDimension.addCategory(MEASURE_AVG_WHITE_SPACES);
    measureDimension.addCategory(MEASURE_UPPERCASE_CHARS);
    measureDimension.addCategory(MEASURE_UPPERCASE_CHARS_EXCL_FIRST_LETTERS);
    measureDimension.addCategory(MEASURE_LOWERCASE_CHARS);
    measureDimension.addCategory(MEASURE_DIGIT_CHARS);
    measureDimension.addCategory(MEASURE_DIACRITIC_CHARS);
    measureDimension.addCategory(MEASURE_NON_LETTER_CHARS);
    measureDimension.addCategory(MEASURE_WORD_COUNT);
    measureDimension.addCategory(MEASURE_MAX_WORDS);
    measureDimension.addCategory(MEASURE_MIN_WORDS);

    CrosstabDimension columnDimension = new CrosstabDimension(DIMENSION_COLUMN);

    Crosstab<Number> crosstab =
        new Crosstab<Number>(Number.class, columnDimension, measureDimension);

    for (InputColumn<String> column : _columns) {
      String columnName = column.getName();

      StringAnalyzerColumnDelegate delegate = _columnDelegates.get(column);

      columnDimension.addCategory(columnName);

      final Integer numRows = delegate.getNumRows();
      final Integer numNull = delegate.getNumNull();
      final Integer numBlank = delegate.getNumBlank();
      final Integer numEntirelyUppercase = delegate.getNumEntirelyUppercase();
      final Integer numEntirelyLowercase = delegate.getNumEntirelyLowercase();
      final Integer numChars = delegate.getNumChars();
      final Integer maxChars = delegate.getMaxChars();
      final Integer minChars = delegate.getMinChars();
      final Integer numWords = delegate.getNumWords();
      final Integer maxWords = delegate.getMaxWords();
      final Integer minWords = delegate.getMinWords();
      final Integer maxWhitespace = delegate.getMaxWhitespace();
      final Integer minWhitespace = delegate.getMinWhitespace();
      final Integer numUppercase = delegate.getNumUppercase();
      final Integer numUppercaseExclFirstLetter = delegate.getNumUppercaseExclFirstLetter();
      final Integer numLowercase = delegate.getNumLowercase();
      final Integer numDigits = delegate.getNumDigit();
      final Integer numDiacritics = delegate.getNumDiacritics();
      final Integer numNonLetter = delegate.getNumNonLetter();
      final AverageBuilder charAverageBuilder = delegate.getCharAverageBuilder();
      final AverageBuilder blanksAverageBuilder = delegate.getWhitespaceAverageBuilder();

      Double avgChars = null;
      if (charAverageBuilder.getNumValues() > 0) {
        avgChars = charAverageBuilder.getAverage();
      }
      Double avgBlanks = null;
      if (blanksAverageBuilder.getNumValues() > 0) {
        avgBlanks = blanksAverageBuilder.getAverage();
      }

      // begin entering numbers into the crosstab
      CrosstabNavigator<Number> nav = crosstab.where(columnDimension, columnName);

      nav.where(measureDimension, MEASURE_ROW_COUNT).put(numRows);

      nav.where(measureDimension, MEASURE_NULL_COUNT).put(numNull);
      if (numNull > 0) {
        addAttachment(nav, delegate.getNullAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_BLANK_COUNT).put(numBlank);
      if (numBlank > 0) {
        addAttachment(nav, delegate.getBlankAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_ENTIRELY_UPPERCASE_COUNT).put(numEntirelyUppercase);
      if (numEntirelyUppercase > 0) {
        addAttachment(nav, delegate.getEntirelyUppercaseAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_ENTIRELY_LOWERCASE_COUNT).put(numEntirelyLowercase);
      if (numEntirelyLowercase > 0) {
        addAttachment(nav, delegate.getEntirelyLowercaseAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_TOTAL_CHAR_COUNT).put(numChars);

      nav.where(measureDimension, MEASURE_MAX_CHARS).put(maxChars);
      if (maxChars != null) {
        addAttachment(nav, delegate.getMaxCharsAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_MIN_CHARS).put(minChars);
      if (minChars != null) {
        addAttachment(nav, delegate.getMinCharsAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_AVG_CHARS).put(avgChars);
      nav.where(measureDimension, MEASURE_MAX_WHITE_SPACES).put(maxWhitespace);
      if (maxWhitespace != null) {
        addAttachment(nav, delegate.getMaxWhitespaceAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_MIN_WHITE_SPACES).put(minWhitespace);
      if (minWhitespace != null) {
        addAttachment(nav, delegate.getMinWhitespaceAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_AVG_WHITE_SPACES).put(avgBlanks);
      nav.where(measureDimension, MEASURE_UPPERCASE_CHARS).put(numUppercase);
      nav.where(measureDimension, MEASURE_UPPERCASE_CHARS_EXCL_FIRST_LETTERS)
          .put(numUppercaseExclFirstLetter);
      if (numUppercaseExclFirstLetter > 0) {
        addAttachment(nav, delegate.getUppercaseExclFirstLetterAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_LOWERCASE_CHARS).put(numLowercase);
      nav.where(measureDimension, MEASURE_DIGIT_CHARS).put(numDigits);
      if (numDigits > 0) {
        addAttachment(nav, delegate.getDigitAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_DIACRITIC_CHARS).put(numDiacritics);
      if (numDiacritics > 0) {
        addAttachment(nav, delegate.getDiacriticAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_NON_LETTER_CHARS).put(numNonLetter);
      nav.where(measureDimension, MEASURE_WORD_COUNT).put(numWords);

      nav.where(measureDimension, MEASURE_MAX_WORDS).put(maxWords);
      if (maxWords != null) {
        addAttachment(nav, delegate.getMaxWordsAnnotation(), column);
      }

      nav.where(measureDimension, MEASURE_MIN_WORDS).put(minWords);
      if (minWords != null) {
        addAttachment(nav, delegate.getMinWordsAnnotation(), column);
      }
    }

    return new StringAnalyzerResult(_columns, crosstab);
  }