/** * Determine the ideal fields. * * @param headerList The headers. * @return The indexes of the ideal fields. */ private int[] determineIdealFields(final CSVHeaders headerList) { int[] result; final String type = getProp().getPropertyString(ScriptProperties.ML_CONFIG_TYPE); // is it non-supervised? if (type.equals(MLMethodFactory.TYPE_SOM)) { result = new int[0]; return result; } final List<Integer> fields = new ArrayList<Integer>(); for (int currentIndex = 0; currentIndex < headerList.size(); currentIndex++) { final String baseName = headerList.getBaseHeader(currentIndex); final int slice = headerList.getSlice(currentIndex); final AnalystField field = getAnalyst().getScript().findNormalizedField(baseName, slice); if (field != null && field.isOutput()) { fields.add(currentIndex); } } // allocate result array result = new int[fields.size()]; for (int i = 0; i < result.length; i++) { result[i] = fields.get(i); } return result; }
/** * Extract fields from a file into a numeric array for machine learning. * * @param analyst The analyst to use. * @param headers The headers for the input data. * @param csv The CSV that holds the input data. * @param outputLength The length of the returned array. * @param skipOutput True if the output should be skipped. * @return The encoded data. */ public static final double[] extractFields( final EncogAnalyst analyst, final CSVHeaders headers, final ReadCSV csv, final int outputLength, final boolean skipOutput) { final double[] output = new double[outputLength]; int outputIndex = 0; for (final AnalystField stat : analyst.getScript().getNormalize().getNormalizedFields()) { stat.init(); if (stat.getAction() == NormalizationAction.Ignore) { continue; } if (stat.isOutput() && skipOutput) { continue; } int index = headers.find(stat.getName()); final String str = csv.get(index).trim(); // is this an unknown value? if (str.equals("?") || str.length() == 0) { HandleMissingValues handler = analyst.getScript().getNormalize().getMissingValues(); double[] d = handler.handleMissing(analyst, stat); // should we skip the entire row if (d == null) { return null; } // copy the returned values in place of the missing values for (int i = 0; i < d.length; i++) { output[outputIndex++] = d[i]; } } else { // known value if (stat.getAction() == NormalizationAction.Normalize) { double d = csv.getFormat().parse(str); d = stat.normalize(d); output[outputIndex++] = d; } else if (stat.getAction() == NormalizationAction.PassThrough) { double d = csv.getFormat().parse(str); output[outputIndex++] = d; } else { final double[] d = stat.encode(str); for (final double element : d) { output[outputIndex++] = element; } } } } return output; }
/** * Determine the input fields. * * @param headerList The headers. * @return The indexes of the input fields. */ private int[] determineInputFields(final CSVHeaders headerList) { final List<Integer> fields = new ArrayList<Integer>(); for (int currentIndex = 0; currentIndex < headerList.size(); currentIndex++) { final String baseName = headerList.getBaseHeader(currentIndex); final int slice = headerList.getSlice(currentIndex); final AnalystField field = getAnalyst().getScript().findNormalizedField(baseName, slice); if (field != null && field.isInput()) { fields.add(currentIndex); } } // allocate result array final int[] result = new int[fields.size()]; for (int i = 0; i < result.length; i++) { result[i] = fields.get(i); } return result; }
/** * Write the headers. * * @param tw The output stream. */ private void writeHeaders(final PrintWriter tw) { final StringBuilder line = new StringBuilder(); for (final AnalystField stat : this.analyst.getScript().getNormalize().getNormalizedFields()) { final int needed = stat.getColumnsNeeded(); for (int i = 0; i < needed; i++) { BasicFile.appendSeparator(line, getFormat()); line.append('\"'); line.append(CSVHeaders.tagColumn(stat.getName(), i, stat.getTimeSlice(), needed > 1)); line.append('\"'); } } tw.println(line.toString()); }
/** * Add headings for a raw file. * * @param line The line to write the raw headings to. * @param prefix The prefix to place. * @param format The format to use. */ public final void addRawHeadings( final StringBuilder line, final String prefix, final CSVFormat format) { final int subFields = getColumnsNeeded(); for (int i = 0; i < subFields; i++) { final String str = CSVHeaders.tagColumn(this.name, i, this.timeSlice, subFields > 1); BasicFile.appendSeparator(line, format); line.append('\"'); if (prefix != null) { line.append(prefix); } line.append(str); line.append('\"'); } }