public void testClassification() throws Exception {
    File rawFile = TEMP_DIR.createFile("simple.csv");
    File egaFile = TEMP_DIR.createFile("simple.ega");
    File outputFile = TEMP_DIR.createFile("simple_output.csv");

    FileUtil.copyResource("org/encog/data/simple.csv", rawFile);
    FileUtil.copyResource("org/encog/data/simple-r.ega", egaFile);

    EncogAnalyst analyst = new EncogAnalyst();
    analyst.addAnalystListener(new ConsoleAnalystListener());
    analyst.load(egaFile);

    analyst.executeTask("task-full");

    ReadCSV csv = new ReadCSV(outputFile.toString(), true, CSVFormat.ENGLISH);
    while (csv.next()) {
      double diff = Math.abs(csv.getDouble(2) - csv.getDouble(4));
      Assert.assertTrue(diff < 1.5);
    }

    Assert.assertEquals(4, analyst.getScript().getFields().length);
    Assert.assertEquals(3, analyst.getScript().getFields()[3].getClassMembers().size());

    csv.close();
  }
  /**
   * Load a CSV file into a memory dataset.
   *
   * @param format The CSV format to use.
   * @param filename The filename to load.
   * @param headers True if there is a header line.
   * @param inputSize The input size. Input always comes first in a file.
   * @param idealSize The ideal size, 0 for unsupervised.
   * @return A NeuralDataSet that holds the contents of the CSV file.
   */
  public static MLDataSet loadCSVTOMemory(
      CSVFormat format, String filename, boolean headers, int inputSize, int idealSize) {
    MLDataSet result = new BasicMLDataSet();
    ReadCSV csv = new ReadCSV(filename, headers, format);
    while (csv.next()) {
      MLData input = null;
      MLData ideal = null;
      int index = 0;

      input = new BasicMLData(inputSize);
      for (int i = 0; i < inputSize; i++) {
        double d = csv.getDouble(index++);
        input.setData(i, d);
      }

      if (idealSize > 0) {
        ideal = new BasicMLData(idealSize);
        for (int i = 0; i < idealSize; i++) {
          double d = csv.getDouble(index++);
          ideal.setData(i, d);
        }
      }

      MLDataPair pair = new BasicMLDataPair(input, ideal);
      result.add(pair);
    }

    return result;
  }
  /**
   * Normalize the input file. Write to the specified file.
   *
   * @param file The file to write to.
   */
  public void normalize(final File file) {
    if (this.analyst == null) {
      throw new EncogError("Can't normalize yet, file has not been analyzed.");
    }

    ReadCSV csv = null;
    PrintWriter tw = null;

    try {
      csv = new ReadCSV(getInputFilename().toString(), isExpectInputHeaders(), getFormat());

      tw = new PrintWriter(new FileWriter(file));

      // write headers, if needed
      if (isProduceOutputHeaders()) {
        writeHeaders(tw);
      }

      resetStatus();
      final int outputLength = this.analyst.determineTotalColumns();

      // write file contents
      while (csv.next() && !shouldStop()) {
        updateStatus(false);

        double[] output =
            AnalystNormalizeCSV.extractFields(
                this.analyst, this.analystHeaders, csv, outputLength, false);

        if (this.series.getTotalDepth() > 1) {
          output = this.series.process(output);
        }

        if (output != null) {
          final StringBuilder line = new StringBuilder();
          NumberList.toList(getFormat(), line, output);
          tw.println(line);
        }
      }
    } catch (final IOException e) {
      throw new QuantError(e);
    } finally {
      reportDone(false);
      if (csv != null) {
        try {
          csv.close();
        } catch (final Exception ex) {
          EncogLogging.log(ex);
        }
      }

      if (tw != null) {
        try {
          tw.close();
        } catch (final Exception ex) {
          EncogLogging.log(ex);
        }
      }
    }
  }
예제 #4
0
  public static void convertCSV2Binary(
      File csvFile, CSVFormat format, File binFile, int[] input, int[] ideal, boolean headers) {

    binFile.delete();
    ReadCSV csv = new ReadCSV(csvFile.toString(), headers, format);

    BufferedMLDataSet buffer = new BufferedMLDataSet(binFile);
    buffer.beginLoad(input.length, ideal.length);
    while (csv.next()) {
      BasicMLData inputData = new BasicMLData(input.length);
      BasicMLData idealData = new BasicMLData(ideal.length);

      // handle input data
      for (int i = 0; i < input.length; i++) {
        inputData.setData(i, csv.getDouble(input[i]));
      }

      // handle input data
      for (int i = 0; i < ideal.length; i++) {
        idealData.setData(i, csv.getDouble(ideal[i]));
      }

      // add to dataset

      buffer.add(inputData, idealData);
    }
    buffer.endLoad();
  }
  /**
   * Extract fields from a file into a numeric array for machine learning.
   *
   * @param analyst The analyst to use.
   * @param headers The headers for the input data.
   * @param csv The CSV that holds the input data.
   * @param outputLength The length of the returned array.
   * @param skipOutput True if the output should be skipped.
   * @return The encoded data.
   */
  public static final double[] extractFields(
      final EncogAnalyst analyst,
      final CSVHeaders headers,
      final ReadCSV csv,
      final int outputLength,
      final boolean skipOutput) {
    final double[] output = new double[outputLength];
    int outputIndex = 0;
    for (final AnalystField stat : analyst.getScript().getNormalize().getNormalizedFields()) {

      stat.init();

      if (stat.getAction() == NormalizationAction.Ignore) {
        continue;
      }

      if (stat.isOutput() && skipOutput) {
        continue;
      }

      int index = headers.find(stat.getName());
      final String str = csv.get(index).trim();

      // is this an unknown value?
      if (str.equals("?") || str.length() == 0) {
        HandleMissingValues handler = analyst.getScript().getNormalize().getMissingValues();
        double[] d = handler.handleMissing(analyst, stat);

        // should we skip the entire row
        if (d == null) {
          return null;
        }

        // copy the returned values in place of the missing values
        for (int i = 0; i < d.length; i++) {
          output[outputIndex++] = d[i];
        }
      } else {
        // known value
        if (stat.getAction() == NormalizationAction.Normalize) {
          double d = csv.getFormat().parse(str);
          d = stat.normalize(d);
          output[outputIndex++] = d;
        } else if (stat.getAction() == NormalizationAction.PassThrough) {
          double d = csv.getFormat().parse(str);
          output[outputIndex++] = d;
        } else {
          final double[] d = stat.encode(str);
          for (final double element : d) {
            output[outputIndex++] = element;
          }
        }
      }
    }

    return output;
  }
  private void analyzeFile() {
    ScriptProperties prop = this.analyst.getScript().getProperties();

    // get filenames, headers & format
    String sourceID = prop.getPropertyString(ScriptProperties.HEADER_DATASOURCE_RAW_FILE);

    File sourceFile = this.analyst.getScript().resolveFilename(sourceID);
    CSVFormat inputFormat = this.analyst.getScript().determineFormat();
    boolean headers = this.analyst.getScript().expectInputHeaders(sourceID);

    // read the file
    this.rowCount = 0;
    this.missingCount = 0;

    ReadCSV csv = new ReadCSV(sourceFile.toString(), headers, inputFormat);
    while (csv.next()) {
      rowCount++;
      if (csv.hasMissing()) missingCount++;
    }
    csv.close();
  }
  /**
   * Analyze the data. This counts the records and prepares the data to be processed.
   *
   * @param theAnalyst The analyst to use.
   * @param inputFile The input file to analyze.
   * @param headers True, if the input file has headers.
   * @param format The format of the input file.
   */
  public void analyze(
      final EncogAnalyst theAnalyst,
      final File inputFile,
      final boolean headers,
      final CSVFormat format) {
    this.setInputFilename(inputFile);
    setExpectInputHeaders(headers);
    setInputFormat(format);

    setAnalyzed(true);
    this.analyst = theAnalyst;
    this.data = new BasicMLDataSet();
    resetStatus();
    int recordCount = 0;

    final int outputLength = this.analyst.determineTotalColumns();
    final ReadCSV csv =
        new ReadCSV(
            this.getInputFilename().toString(), this.isExpectInputHeaders(), this.getFormat());
    readHeaders(csv);

    this.analystHeaders = new CSVHeaders(this.getInputHeadings());

    while (csv.next() && !shouldStop()) {
      updateStatus(true);
      final double[] inputArray =
          AnalystNormalizeCSV.extractFields(analyst, this.analystHeaders, csv, outputLength, true);
      final MLData input = new BasicMLData(inputArray);
      this.data.add(new BasicMLDataPair(input));

      recordCount++;
    }
    setRecordCount(recordCount);
    this.setColumnCount(csv.getColumnCount());

    readHeaders(csv);
    csv.close();
    reportDone(true);
  }
예제 #8
0
  /**
   * Construct the object.
   *
   * @param filename The filename.
   * @param headers False if headers are not extended.
   * @param format The CSV format.
   */
  public CSVHeaders(final File filename, final boolean headers, final CSVFormat format) {
    ReadCSV csv = null;
    try {
      csv = new ReadCSV(filename.toString(), headers, format);
      if (csv.next()) {
        if (headers) {
          for (final String str : csv.getColumnNames()) {
            this.headerList.add(str);
          }
        } else {
          for (int i = 0; i < csv.getColumnCount(); i++) {
            this.headerList.add("field:" + (i + 1));
          }
        }
      }

      init();

    } finally {
      if (csv != null) {
        csv.close();
      }
    }
  }