public void testClassification() throws Exception { File rawFile = TEMP_DIR.createFile("simple.csv"); File egaFile = TEMP_DIR.createFile("simple.ega"); File outputFile = TEMP_DIR.createFile("simple_output.csv"); FileUtil.copyResource("org/encog/data/simple.csv", rawFile); FileUtil.copyResource("org/encog/data/simple-r.ega", egaFile); EncogAnalyst analyst = new EncogAnalyst(); analyst.addAnalystListener(new ConsoleAnalystListener()); analyst.load(egaFile); analyst.executeTask("task-full"); ReadCSV csv = new ReadCSV(outputFile.toString(), true, CSVFormat.ENGLISH); while (csv.next()) { double diff = Math.abs(csv.getDouble(2) - csv.getDouble(4)); Assert.assertTrue(diff < 1.5); } Assert.assertEquals(4, analyst.getScript().getFields().length); Assert.assertEquals(3, analyst.getScript().getFields()[3].getClassMembers().size()); csv.close(); }
/** * Load a CSV file into a memory dataset. * * @param format The CSV format to use. * @param filename The filename to load. * @param headers True if there is a header line. * @param inputSize The input size. Input always comes first in a file. * @param idealSize The ideal size, 0 for unsupervised. * @return A NeuralDataSet that holds the contents of the CSV file. */ public static MLDataSet loadCSVTOMemory( CSVFormat format, String filename, boolean headers, int inputSize, int idealSize) { MLDataSet result = new BasicMLDataSet(); ReadCSV csv = new ReadCSV(filename, headers, format); while (csv.next()) { MLData input = null; MLData ideal = null; int index = 0; input = new BasicMLData(inputSize); for (int i = 0; i < inputSize; i++) { double d = csv.getDouble(index++); input.setData(i, d); } if (idealSize > 0) { ideal = new BasicMLData(idealSize); for (int i = 0; i < idealSize; i++) { double d = csv.getDouble(index++); ideal.setData(i, d); } } MLDataPair pair = new BasicMLDataPair(input, ideal); result.add(pair); } return result; }
/** * Normalize the input file. Write to the specified file. * * @param file The file to write to. */ public void normalize(final File file) { if (this.analyst == null) { throw new EncogError("Can't normalize yet, file has not been analyzed."); } ReadCSV csv = null; PrintWriter tw = null; try { csv = new ReadCSV(getInputFilename().toString(), isExpectInputHeaders(), getFormat()); tw = new PrintWriter(new FileWriter(file)); // write headers, if needed if (isProduceOutputHeaders()) { writeHeaders(tw); } resetStatus(); final int outputLength = this.analyst.determineTotalColumns(); // write file contents while (csv.next() && !shouldStop()) { updateStatus(false); double[] output = AnalystNormalizeCSV.extractFields( this.analyst, this.analystHeaders, csv, outputLength, false); if (this.series.getTotalDepth() > 1) { output = this.series.process(output); } if (output != null) { final StringBuilder line = new StringBuilder(); NumberList.toList(getFormat(), line, output); tw.println(line); } } } catch (final IOException e) { throw new QuantError(e); } finally { reportDone(false); if (csv != null) { try { csv.close(); } catch (final Exception ex) { EncogLogging.log(ex); } } if (tw != null) { try { tw.close(); } catch (final Exception ex) { EncogLogging.log(ex); } } } }
public static void convertCSV2Binary( File csvFile, CSVFormat format, File binFile, int[] input, int[] ideal, boolean headers) { binFile.delete(); ReadCSV csv = new ReadCSV(csvFile.toString(), headers, format); BufferedMLDataSet buffer = new BufferedMLDataSet(binFile); buffer.beginLoad(input.length, ideal.length); while (csv.next()) { BasicMLData inputData = new BasicMLData(input.length); BasicMLData idealData = new BasicMLData(ideal.length); // handle input data for (int i = 0; i < input.length; i++) { inputData.setData(i, csv.getDouble(input[i])); } // handle input data for (int i = 0; i < ideal.length; i++) { idealData.setData(i, csv.getDouble(ideal[i])); } // add to dataset buffer.add(inputData, idealData); } buffer.endLoad(); }
/** * Extract fields from a file into a numeric array for machine learning. * * @param analyst The analyst to use. * @param headers The headers for the input data. * @param csv The CSV that holds the input data. * @param outputLength The length of the returned array. * @param skipOutput True if the output should be skipped. * @return The encoded data. */ public static final double[] extractFields( final EncogAnalyst analyst, final CSVHeaders headers, final ReadCSV csv, final int outputLength, final boolean skipOutput) { final double[] output = new double[outputLength]; int outputIndex = 0; for (final AnalystField stat : analyst.getScript().getNormalize().getNormalizedFields()) { stat.init(); if (stat.getAction() == NormalizationAction.Ignore) { continue; } if (stat.isOutput() && skipOutput) { continue; } int index = headers.find(stat.getName()); final String str = csv.get(index).trim(); // is this an unknown value? if (str.equals("?") || str.length() == 0) { HandleMissingValues handler = analyst.getScript().getNormalize().getMissingValues(); double[] d = handler.handleMissing(analyst, stat); // should we skip the entire row if (d == null) { return null; } // copy the returned values in place of the missing values for (int i = 0; i < d.length; i++) { output[outputIndex++] = d[i]; } } else { // known value if (stat.getAction() == NormalizationAction.Normalize) { double d = csv.getFormat().parse(str); d = stat.normalize(d); output[outputIndex++] = d; } else if (stat.getAction() == NormalizationAction.PassThrough) { double d = csv.getFormat().parse(str); output[outputIndex++] = d; } else { final double[] d = stat.encode(str); for (final double element : d) { output[outputIndex++] = element; } } } } return output; }
private void analyzeFile() { ScriptProperties prop = this.analyst.getScript().getProperties(); // get filenames, headers & format String sourceID = prop.getPropertyString(ScriptProperties.HEADER_DATASOURCE_RAW_FILE); File sourceFile = this.analyst.getScript().resolveFilename(sourceID); CSVFormat inputFormat = this.analyst.getScript().determineFormat(); boolean headers = this.analyst.getScript().expectInputHeaders(sourceID); // read the file this.rowCount = 0; this.missingCount = 0; ReadCSV csv = new ReadCSV(sourceFile.toString(), headers, inputFormat); while (csv.next()) { rowCount++; if (csv.hasMissing()) missingCount++; } csv.close(); }
/** * Analyze the data. This counts the records and prepares the data to be processed. * * @param theAnalyst The analyst to use. * @param inputFile The input file to analyze. * @param headers True, if the input file has headers. * @param format The format of the input file. */ public void analyze( final EncogAnalyst theAnalyst, final File inputFile, final boolean headers, final CSVFormat format) { this.setInputFilename(inputFile); setExpectInputHeaders(headers); setInputFormat(format); setAnalyzed(true); this.analyst = theAnalyst; this.data = new BasicMLDataSet(); resetStatus(); int recordCount = 0; final int outputLength = this.analyst.determineTotalColumns(); final ReadCSV csv = new ReadCSV( this.getInputFilename().toString(), this.isExpectInputHeaders(), this.getFormat()); readHeaders(csv); this.analystHeaders = new CSVHeaders(this.getInputHeadings()); while (csv.next() && !shouldStop()) { updateStatus(true); final double[] inputArray = AnalystNormalizeCSV.extractFields(analyst, this.analystHeaders, csv, outputLength, true); final MLData input = new BasicMLData(inputArray); this.data.add(new BasicMLDataPair(input)); recordCount++; } setRecordCount(recordCount); this.setColumnCount(csv.getColumnCount()); readHeaders(csv); csv.close(); reportDone(true); }
/** * Construct the object. * * @param filename The filename. * @param headers False if headers are not extended. * @param format The CSV format. */ public CSVHeaders(final File filename, final boolean headers, final CSVFormat format) { ReadCSV csv = null; try { csv = new ReadCSV(filename.toString(), headers, format); if (csv.next()) { if (headers) { for (final String str : csv.getColumnNames()) { this.headerList.add(str); } } else { for (int i = 0; i < csv.getColumnCount(); i++) { this.headerList.add("field:" + (i + 1)); } } } init(); } finally { if (csv != null) { csv.close(); } } }