public void testClassification() throws Exception {
    File rawFile = TEMP_DIR.createFile("simple.csv");
    File egaFile = TEMP_DIR.createFile("simple.ega");
    File outputFile = TEMP_DIR.createFile("simple_output.csv");

    FileUtil.copyResource("org/encog/data/simple.csv", rawFile);
    FileUtil.copyResource("org/encog/data/simple-r.ega", egaFile);

    EncogAnalyst analyst = new EncogAnalyst();
    analyst.addAnalystListener(new ConsoleAnalystListener());
    analyst.load(egaFile);

    analyst.executeTask("task-full");

    ReadCSV csv = new ReadCSV(outputFile.toString(), true, CSVFormat.ENGLISH);
    while (csv.next()) {
      double diff = Math.abs(csv.getDouble(2) - csv.getDouble(4));
      Assert.assertTrue(diff < 1.5);
    }

    Assert.assertEquals(4, analyst.getScript().getFields().length);
    Assert.assertEquals(3, analyst.getScript().getFields()[3].getClassMembers().size());

    csv.close();
  }
  /**
   * Normalize the input file. Write to the specified file.
   *
   * @param file The file to write to.
   */
  public void normalize(final File file) {
    if (this.analyst == null) {
      throw new EncogError("Can't normalize yet, file has not been analyzed.");
    }

    ReadCSV csv = null;
    PrintWriter tw = null;

    try {
      csv = new ReadCSV(getInputFilename().toString(), isExpectInputHeaders(), getFormat());

      tw = new PrintWriter(new FileWriter(file));

      // write headers, if needed
      if (isProduceOutputHeaders()) {
        writeHeaders(tw);
      }

      resetStatus();
      final int outputLength = this.analyst.determineTotalColumns();

      // write file contents
      while (csv.next() && !shouldStop()) {
        updateStatus(false);

        double[] output =
            AnalystNormalizeCSV.extractFields(
                this.analyst, this.analystHeaders, csv, outputLength, false);

        if (this.series.getTotalDepth() > 1) {
          output = this.series.process(output);
        }

        if (output != null) {
          final StringBuilder line = new StringBuilder();
          NumberList.toList(getFormat(), line, output);
          tw.println(line);
        }
      }
    } catch (final IOException e) {
      throw new QuantError(e);
    } finally {
      reportDone(false);
      if (csv != null) {
        try {
          csv.close();
        } catch (final Exception ex) {
          EncogLogging.log(ex);
        }
      }

      if (tw != null) {
        try {
          tw.close();
        } catch (final Exception ex) {
          EncogLogging.log(ex);
        }
      }
    }
  }
  private void analyzeFile() {
    ScriptProperties prop = this.analyst.getScript().getProperties();

    // get filenames, headers & format
    String sourceID = prop.getPropertyString(ScriptProperties.HEADER_DATASOURCE_RAW_FILE);

    File sourceFile = this.analyst.getScript().resolveFilename(sourceID);
    CSVFormat inputFormat = this.analyst.getScript().determineFormat();
    boolean headers = this.analyst.getScript().expectInputHeaders(sourceID);

    // read the file
    this.rowCount = 0;
    this.missingCount = 0;

    ReadCSV csv = new ReadCSV(sourceFile.toString(), headers, inputFormat);
    while (csv.next()) {
      rowCount++;
      if (csv.hasMissing()) missingCount++;
    }
    csv.close();
  }
  /**
   * Analyze the data. This counts the records and prepares the data to be processed.
   *
   * @param theAnalyst The analyst to use.
   * @param inputFile The input file to analyze.
   * @param headers True, if the input file has headers.
   * @param format The format of the input file.
   */
  public void analyze(
      final EncogAnalyst theAnalyst,
      final File inputFile,
      final boolean headers,
      final CSVFormat format) {
    this.setInputFilename(inputFile);
    setExpectInputHeaders(headers);
    setInputFormat(format);

    setAnalyzed(true);
    this.analyst = theAnalyst;
    this.data = new BasicMLDataSet();
    resetStatus();
    int recordCount = 0;

    final int outputLength = this.analyst.determineTotalColumns();
    final ReadCSV csv =
        new ReadCSV(
            this.getInputFilename().toString(), this.isExpectInputHeaders(), this.getFormat());
    readHeaders(csv);

    this.analystHeaders = new CSVHeaders(this.getInputHeadings());

    while (csv.next() && !shouldStop()) {
      updateStatus(true);
      final double[] inputArray =
          AnalystNormalizeCSV.extractFields(analyst, this.analystHeaders, csv, outputLength, true);
      final MLData input = new BasicMLData(inputArray);
      this.data.add(new BasicMLDataPair(input));

      recordCount++;
    }
    setRecordCount(recordCount);
    this.setColumnCount(csv.getColumnCount());

    readHeaders(csv);
    csv.close();
    reportDone(true);
  }
Beispiel #5
0
  /**
   * Construct the object.
   *
   * @param filename The filename.
   * @param headers False if headers are not extended.
   * @param format The CSV format.
   */
  public CSVHeaders(final File filename, final boolean headers, final CSVFormat format) {
    ReadCSV csv = null;
    try {
      csv = new ReadCSV(filename.toString(), headers, format);
      if (csv.next()) {
        if (headers) {
          for (final String str : csv.getColumnNames()) {
            this.headerList.add(str);
          }
        } else {
          for (int i = 0; i < csv.getColumnCount(); i++) {
            this.headerList.add("field:" + (i + 1));
          }
        }
      }

      init();

    } finally {
      if (csv != null) {
        csv.close();
      }
    }
  }