/**
   * Determine the ideal fields.
   *
   * @param headerList The headers.
   * @return The indexes of the ideal fields.
   */
  private int[] determineIdealFields(final CSVHeaders headerList) {

    int[] result;
    final String type = getProp().getPropertyString(ScriptProperties.ML_CONFIG_TYPE);

    // is it non-supervised?
    if (type.equals(MLMethodFactory.TYPE_SOM)) {
      result = new int[0];
      return result;
    }

    final List<Integer> fields = new ArrayList<Integer>();

    for (int currentIndex = 0; currentIndex < headerList.size(); currentIndex++) {
      final String baseName = headerList.getBaseHeader(currentIndex);
      final int slice = headerList.getSlice(currentIndex);
      final AnalystField field = getAnalyst().getScript().findNormalizedField(baseName, slice);

      if (field != null && field.isOutput()) {
        fields.add(currentIndex);
      }
    }

    // allocate result array
    result = new int[fields.size()];
    for (int i = 0; i < result.length; i++) {
      result[i] = fields.get(i);
    }

    return result;
  }
  /**
   * Extract fields from a file into a numeric array for machine learning.
   *
   * @param analyst The analyst to use.
   * @param headers The headers for the input data.
   * @param csv The CSV that holds the input data.
   * @param outputLength The length of the returned array.
   * @param skipOutput True if the output should be skipped.
   * @return The encoded data.
   */
  public static final double[] extractFields(
      final EncogAnalyst analyst,
      final CSVHeaders headers,
      final ReadCSV csv,
      final int outputLength,
      final boolean skipOutput) {
    final double[] output = new double[outputLength];
    int outputIndex = 0;
    for (final AnalystField stat : analyst.getScript().getNormalize().getNormalizedFields()) {

      stat.init();

      if (stat.getAction() == NormalizationAction.Ignore) {
        continue;
      }

      if (stat.isOutput() && skipOutput) {
        continue;
      }

      int index = headers.find(stat.getName());
      final String str = csv.get(index).trim();

      // is this an unknown value?
      if (str.equals("?") || str.length() == 0) {
        HandleMissingValues handler = analyst.getScript().getNormalize().getMissingValues();
        double[] d = handler.handleMissing(analyst, stat);

        // should we skip the entire row
        if (d == null) {
          return null;
        }

        // copy the returned values in place of the missing values
        for (int i = 0; i < d.length; i++) {
          output[outputIndex++] = d[i];
        }
      } else {
        // known value
        if (stat.getAction() == NormalizationAction.Normalize) {
          double d = csv.getFormat().parse(str);
          d = stat.normalize(d);
          output[outputIndex++] = d;
        } else if (stat.getAction() == NormalizationAction.PassThrough) {
          double d = csv.getFormat().parse(str);
          output[outputIndex++] = d;
        } else {
          final double[] d = stat.encode(str);
          for (final double element : d) {
            output[outputIndex++] = element;
          }
        }
      }
    }

    return output;
  }
  /**
   * Write the headers.
   *
   * @param tw The output stream.
   */
  private void writeHeaders(final PrintWriter tw) {
    final StringBuilder line = new StringBuilder();
    for (final AnalystField stat : this.analyst.getScript().getNormalize().getNormalizedFields()) {
      final int needed = stat.getColumnsNeeded();

      for (int i = 0; i < needed; i++) {
        BasicFile.appendSeparator(line, getFormat());
        line.append('\"');
        line.append(CSVHeaders.tagColumn(stat.getName(), i, stat.getTimeSlice(), needed > 1));
        line.append('\"');
      }
    }
    tw.println(line.toString());
  }
  /**
   * Analyze the file.
   *
   * @param inputFilename The input file.
   * @param expectInputHeaders True, if input headers are present.
   * @param inputFormat The format.
   * @param theAnalyst The analyst to use.
   */
  public void analyze(
      final File inputFilename,
      final boolean expectInputHeaders,
      final CSVFormat inputFormat,
      final EncogAnalyst theAnalyst) {
    this.setInputFilename(inputFilename);
    this.setInputFormat(inputFormat);
    this.setExpectInputHeaders(expectInputHeaders);
    this.analyst = theAnalyst;
    this.setAnalyzed(true);

    this.analystHeaders = new CSVHeaders(inputFilename, expectInputHeaders, inputFormat);

    for (final AnalystField field : analyst.getScript().getNormalize().getNormalizedFields()) {
      field.init();
    }

    this.series = new TimeSeriesUtil(analyst, true, this.analystHeaders.getHeaders());
  }
  /**
   * Determine the input fields.
   *
   * @param headerList The headers.
   * @return The indexes of the input fields.
   */
  private int[] determineInputFields(final CSVHeaders headerList) {
    final List<Integer> fields = new ArrayList<Integer>();

    for (int currentIndex = 0; currentIndex < headerList.size(); currentIndex++) {
      final String baseName = headerList.getBaseHeader(currentIndex);
      final int slice = headerList.getSlice(currentIndex);
      final AnalystField field = getAnalyst().getScript().findNormalizedField(baseName, slice);

      if (field != null && field.isInput()) {
        fields.add(currentIndex);
      }
    }

    // allocate result array
    final int[] result = new int[fields.size()];
    for (int i = 0; i < result.length; i++) {
      result[i] = fields.get(i);
    }

    return result;
  }
Esempio n. 6
0
  /**
   * Save the normalization data.
   *
   * @param out The output file.
   */
  private void saveNormalize(final EncogWriteHelper out) {
    saveSubSection(out, "NORMALIZE", "CONFIG");

    out.addSubSection("RANGE");
    out.addColumn("name");
    out.addColumn("io");
    out.addColumn("timeSlice");
    out.addColumn("action");
    out.addColumn("high");
    out.addColumn("low");
    out.writeLine();
    for (final AnalystField field : this.script.getNormalize().getNormalizedFields()) {
      out.addColumn(field.getName());
      if (field.isInput()) {
        out.addColumn("input");
      } else {
        out.addColumn("output");
      }
      out.addColumn(field.getTimeSlice());
      switch (field.getAction()) {
        case Ignore:
          out.addColumn("ignore");
          break;
        case Normalize:
          out.addColumn("range");
          break;
        case PassThrough:
          out.addColumn("pass");
          break;
        case OneOf:
          out.addColumn("oneof");
          break;
        case Equilateral:
          out.addColumn("equilateral");
          break;
        case SingleField:
          out.addColumn("single");
          break;
        default:
          throw new AnalystError("Unknown action: " + field.getAction());
      }

      out.addColumn(field.getNormalizedHigh());
      out.addColumn(field.getNormalizedLow());
      out.writeLine();
    }
  }
  /**
   * Produce the report.
   *
   * @return The report.
   */
  public String produceReport() {
    final HTMLReport report = new HTMLReport();

    analyzeFile();
    report.beginHTML();
    report.title("Encog Analyst Report");
    report.beginBody();

    report.h1("General Statistics");
    report.beginTable();
    report.tablePair("Total row count", Format.formatInteger(this.rowCount));
    report.tablePair("Missing row count", Format.formatInteger(this.missingCount));
    report.endTable();

    report.h1("Field Ranges");
    report.beginTable();
    report.beginRow();
    report.header("Name");
    report.header("Class?");
    report.header("Complete?");
    report.header("Int?");
    report.header("Real?");
    report.header("Max");
    report.header("Min");
    report.header("Mean");
    report.header("Standard Deviation");
    report.endRow();

    for (final DataField df : this.analyst.getScript().getFields()) {
      report.beginRow();
      report.cell(df.getName());
      report.cell(Format.formatYesNo(df.isClass()));
      report.cell(Format.formatYesNo(df.isComplete()));
      report.cell(Format.formatYesNo(df.isInteger()));
      report.cell(Format.formatYesNo(df.isReal()));
      report.cell(Format.formatDouble(df.getMax(), FIVE_SPAN));
      report.cell(Format.formatDouble(df.getMin(), FIVE_SPAN));
      report.cell(Format.formatDouble(df.getMean(), FIVE_SPAN));
      report.cell(Format.formatDouble(df.getStandardDeviation(), FIVE_SPAN));
      report.endRow();

      if (df.getClassMembers().size() > 0) {
        report.beginRow();
        report.cell(" ");
        report.beginTableInCell(EIGHT_SPAN);
        report.beginRow();
        report.header("Code");
        report.header("Name");
        report.header("Count");
        report.endRow();
        for (final AnalystClassItem item : df.getClassMembers()) {
          report.beginRow();
          report.cell(item.getCode());
          report.cell(item.getName());
          report.cell(Format.formatInteger(item.getCount()));
          report.endRow();
        }
        report.endTableInCell();
        report.endRow();
      }
    }

    report.endTable();

    report.h1("Normalization");
    report.beginTable();
    report.beginRow();
    report.header("Name");
    report.header("Action");
    report.header("High");
    report.header("Low");
    report.endRow();

    for (final AnalystField item : this.analyst.getScript().getNormalize().getNormalizedFields()) {
      report.beginRow();
      report.cell(item.getName());
      report.cell(item.getAction().toString());
      report.cell(Format.formatDouble(item.getNormalizedHigh(), FIVE_SPAN));
      report.cell(Format.formatDouble(item.getNormalizedLow(), FIVE_SPAN));
      report.endRow();
    }

    report.endTable();

    report.h1("Machine Learning");
    report.beginTable();
    report.beginRow();
    report.header("Name");
    report.header("Value");
    report.endRow();

    final String t =
        this.analyst.getScript().getProperties().getPropertyString(ScriptProperties.ML_CONFIG_TYPE);
    final String a =
        this.analyst
            .getScript()
            .getProperties()
            .getPropertyString(ScriptProperties.ML_CONFIG_ARCHITECTURE);
    final String rf =
        this.analyst
            .getScript()
            .getProperties()
            .getPropertyString(ScriptProperties.ML_CONFIG_MACHINE_LEARNING_FILE);

    report.tablePair("Type", t);
    report.tablePair("Architecture", a);
    report.tablePair("Machine Learning File", rf);
    report.endTable();

    report.h1("Files");
    report.beginTable();
    report.beginRow();
    report.header("Name");
    report.header("Filename");
    report.endRow();
    for (final String key : this.analyst.getScript().getProperties().getFilenames()) {
      final String value = this.analyst.getScript().getProperties().getFilename(key);
      report.beginRow();
      report.cell(key);
      report.cell(value);
      report.endRow();
    }
    report.endTable();

    report.endBody();
    report.endHTML();

    return report.toString();
  }