/** * Determine the ideal fields. * * @param headerList The headers. * @return The indexes of the ideal fields. */ private int[] determineIdealFields(final CSVHeaders headerList) { int[] result; final String type = getProp().getPropertyString(ScriptProperties.ML_CONFIG_TYPE); // is it non-supervised? if (type.equals(MLMethodFactory.TYPE_SOM)) { result = new int[0]; return result; } final List<Integer> fields = new ArrayList<Integer>(); for (int currentIndex = 0; currentIndex < headerList.size(); currentIndex++) { final String baseName = headerList.getBaseHeader(currentIndex); final int slice = headerList.getSlice(currentIndex); final AnalystField field = getAnalyst().getScript().findNormalizedField(baseName, slice); if (field != null && field.isOutput()) { fields.add(currentIndex); } } // allocate result array result = new int[fields.size()]; for (int i = 0; i < result.length; i++) { result[i] = fields.get(i); } return result; }
/** * Extract fields from a file into a numeric array for machine learning. * * @param analyst The analyst to use. * @param headers The headers for the input data. * @param csv The CSV that holds the input data. * @param outputLength The length of the returned array. * @param skipOutput True if the output should be skipped. * @return The encoded data. */ public static final double[] extractFields( final EncogAnalyst analyst, final CSVHeaders headers, final ReadCSV csv, final int outputLength, final boolean skipOutput) { final double[] output = new double[outputLength]; int outputIndex = 0; for (final AnalystField stat : analyst.getScript().getNormalize().getNormalizedFields()) { stat.init(); if (stat.getAction() == NormalizationAction.Ignore) { continue; } if (stat.isOutput() && skipOutput) { continue; } int index = headers.find(stat.getName()); final String str = csv.get(index).trim(); // is this an unknown value? if (str.equals("?") || str.length() == 0) { HandleMissingValues handler = analyst.getScript().getNormalize().getMissingValues(); double[] d = handler.handleMissing(analyst, stat); // should we skip the entire row if (d == null) { return null; } // copy the returned values in place of the missing values for (int i = 0; i < d.length; i++) { output[outputIndex++] = d[i]; } } else { // known value if (stat.getAction() == NormalizationAction.Normalize) { double d = csv.getFormat().parse(str); d = stat.normalize(d); output[outputIndex++] = d; } else if (stat.getAction() == NormalizationAction.PassThrough) { double d = csv.getFormat().parse(str); output[outputIndex++] = d; } else { final double[] d = stat.encode(str); for (final double element : d) { output[outputIndex++] = element; } } } } return output; }
/** * Write the headers. * * @param tw The output stream. */ private void writeHeaders(final PrintWriter tw) { final StringBuilder line = new StringBuilder(); for (final AnalystField stat : this.analyst.getScript().getNormalize().getNormalizedFields()) { final int needed = stat.getColumnsNeeded(); for (int i = 0; i < needed; i++) { BasicFile.appendSeparator(line, getFormat()); line.append('\"'); line.append(CSVHeaders.tagColumn(stat.getName(), i, stat.getTimeSlice(), needed > 1)); line.append('\"'); } } tw.println(line.toString()); }
/** * Analyze the file. * * @param inputFilename The input file. * @param expectInputHeaders True, if input headers are present. * @param inputFormat The format. * @param theAnalyst The analyst to use. */ public void analyze( final File inputFilename, final boolean expectInputHeaders, final CSVFormat inputFormat, final EncogAnalyst theAnalyst) { this.setInputFilename(inputFilename); this.setInputFormat(inputFormat); this.setExpectInputHeaders(expectInputHeaders); this.analyst = theAnalyst; this.setAnalyzed(true); this.analystHeaders = new CSVHeaders(inputFilename, expectInputHeaders, inputFormat); for (final AnalystField field : analyst.getScript().getNormalize().getNormalizedFields()) { field.init(); } this.series = new TimeSeriesUtil(analyst, true, this.analystHeaders.getHeaders()); }
/** * Determine the input fields. * * @param headerList The headers. * @return The indexes of the input fields. */ private int[] determineInputFields(final CSVHeaders headerList) { final List<Integer> fields = new ArrayList<Integer>(); for (int currentIndex = 0; currentIndex < headerList.size(); currentIndex++) { final String baseName = headerList.getBaseHeader(currentIndex); final int slice = headerList.getSlice(currentIndex); final AnalystField field = getAnalyst().getScript().findNormalizedField(baseName, slice); if (field != null && field.isInput()) { fields.add(currentIndex); } } // allocate result array final int[] result = new int[fields.size()]; for (int i = 0; i < result.length; i++) { result[i] = fields.get(i); } return result; }
/** * Save the normalization data. * * @param out The output file. */ private void saveNormalize(final EncogWriteHelper out) { saveSubSection(out, "NORMALIZE", "CONFIG"); out.addSubSection("RANGE"); out.addColumn("name"); out.addColumn("io"); out.addColumn("timeSlice"); out.addColumn("action"); out.addColumn("high"); out.addColumn("low"); out.writeLine(); for (final AnalystField field : this.script.getNormalize().getNormalizedFields()) { out.addColumn(field.getName()); if (field.isInput()) { out.addColumn("input"); } else { out.addColumn("output"); } out.addColumn(field.getTimeSlice()); switch (field.getAction()) { case Ignore: out.addColumn("ignore"); break; case Normalize: out.addColumn("range"); break; case PassThrough: out.addColumn("pass"); break; case OneOf: out.addColumn("oneof"); break; case Equilateral: out.addColumn("equilateral"); break; case SingleField: out.addColumn("single"); break; default: throw new AnalystError("Unknown action: " + field.getAction()); } out.addColumn(field.getNormalizedHigh()); out.addColumn(field.getNormalizedLow()); out.writeLine(); } }
/** * Produce the report. * * @return The report. */ public String produceReport() { final HTMLReport report = new HTMLReport(); analyzeFile(); report.beginHTML(); report.title("Encog Analyst Report"); report.beginBody(); report.h1("General Statistics"); report.beginTable(); report.tablePair("Total row count", Format.formatInteger(this.rowCount)); report.tablePair("Missing row count", Format.formatInteger(this.missingCount)); report.endTable(); report.h1("Field Ranges"); report.beginTable(); report.beginRow(); report.header("Name"); report.header("Class?"); report.header("Complete?"); report.header("Int?"); report.header("Real?"); report.header("Max"); report.header("Min"); report.header("Mean"); report.header("Standard Deviation"); report.endRow(); for (final DataField df : this.analyst.getScript().getFields()) { report.beginRow(); report.cell(df.getName()); report.cell(Format.formatYesNo(df.isClass())); report.cell(Format.formatYesNo(df.isComplete())); report.cell(Format.formatYesNo(df.isInteger())); report.cell(Format.formatYesNo(df.isReal())); report.cell(Format.formatDouble(df.getMax(), FIVE_SPAN)); report.cell(Format.formatDouble(df.getMin(), FIVE_SPAN)); report.cell(Format.formatDouble(df.getMean(), FIVE_SPAN)); report.cell(Format.formatDouble(df.getStandardDeviation(), FIVE_SPAN)); report.endRow(); if (df.getClassMembers().size() > 0) { report.beginRow(); report.cell(" "); report.beginTableInCell(EIGHT_SPAN); report.beginRow(); report.header("Code"); report.header("Name"); report.header("Count"); report.endRow(); for (final AnalystClassItem item : df.getClassMembers()) { report.beginRow(); report.cell(item.getCode()); report.cell(item.getName()); report.cell(Format.formatInteger(item.getCount())); report.endRow(); } report.endTableInCell(); report.endRow(); } } report.endTable(); report.h1("Normalization"); report.beginTable(); report.beginRow(); report.header("Name"); report.header("Action"); report.header("High"); report.header("Low"); report.endRow(); for (final AnalystField item : this.analyst.getScript().getNormalize().getNormalizedFields()) { report.beginRow(); report.cell(item.getName()); report.cell(item.getAction().toString()); report.cell(Format.formatDouble(item.getNormalizedHigh(), FIVE_SPAN)); report.cell(Format.formatDouble(item.getNormalizedLow(), FIVE_SPAN)); report.endRow(); } report.endTable(); report.h1("Machine Learning"); report.beginTable(); report.beginRow(); report.header("Name"); report.header("Value"); report.endRow(); final String t = this.analyst.getScript().getProperties().getPropertyString(ScriptProperties.ML_CONFIG_TYPE); final String a = this.analyst .getScript() .getProperties() .getPropertyString(ScriptProperties.ML_CONFIG_ARCHITECTURE); final String rf = this.analyst .getScript() .getProperties() .getPropertyString(ScriptProperties.ML_CONFIG_MACHINE_LEARNING_FILE); report.tablePair("Type", t); report.tablePair("Architecture", a); report.tablePair("Machine Learning File", rf); report.endTable(); report.h1("Files"); report.beginTable(); report.beginRow(); report.header("Name"); report.header("Filename"); report.endRow(); for (final String key : this.analyst.getScript().getProperties().getFilenames()) { final String value = this.analyst.getScript().getProperties().getFilename(key); report.beginRow(); report.cell(key); report.cell(value); report.endRow(); } report.endTable(); report.endBody(); report.endHTML(); return report.toString(); }