/** * It reads the information in the header of the file. It reads relation's name, attributes' * names, and inputs and outputs. * * @param parser is the parser of the data set * @param isTrain is a boolean indicating if this is a train set (and so parameters information * must be read) or a test set (parameters information has not to be read). */ public void parseHeader(InstanceParser parser, boolean isTrain) { // 1. Declaration of variables Vector inputAttrNames = new Vector(); Vector outputAttrNames = new Vector(); boolean inputsDef = false; boolean outputsDef = false; String line, aux; header = ""; int attCount = 0, lineCount = 0; attHeader = null; while (!(line = parser.getLine().trim()).equalsIgnoreCase("@data")) { line = line.trim(); // System.out.println (" > Line read: " + line +"." ); lineCount++; if (line.toLowerCase().indexOf("@relation") != -1) { if (isTrain) Attributes.setRelationName(line.replaceAll("@relation", "")); } if (line.toLowerCase().indexOf("@attribute") != -1) { if (isTrain) insertAttribute(line); attCount++; } if (line.toLowerCase().indexOf("@inputs") != -1) { attHeader = header; inputsDef = true; aux = line.substring(8); if (isTrain) insertInputOutput(aux, lineCount, inputAttrNames, "inputs", isTrain); } if (line.toLowerCase().indexOf("@outputs") != -1) { if (attHeader == null) attHeader = header; outputsDef = true; // System.out.println ( " >>> Defining the output !!!"); aux = line.substring(8); if (isTrain) insertInputOutput(aux, lineCount, outputAttrNames, "outputs", isTrain); System.out.println(" >> Size of the output is: " + outputAttrNames.size()); } header += line + "\n"; } if (attHeader == null) attHeader = header; processInputsAndOutputs(isTrain, inputsDef, outputsDef, outputAttrNames, inputAttrNames); } // end headerParse
/** * This method reads all the information in a DB and load it to memory. * * @param fileName is the database file name. * @param isTrain is a flag that indicate if the database is for a train or for a test. * @throws DatasetException if there is any semantical error in the input file. * @throws HeaderFormatException if there is any lexical or sintactical error in the header of the * input file */ public void readSet(String fileName, boolean isTrain) throws DatasetException, HeaderFormatException { String line; System.out.println("Opening the file: " + fileName + "."); // Parsing the header of the DB. errorLogger = new FormatErrorKeeper(); // Declaring an instance parser InstanceParser parser = new InstanceParser(fileName, isTrain); // Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs parseHeader(parser, isTrain); System.out.println( " The number of output attributes is: " + Attributes.getOutputNumAttributes()); // The attributes statistics are init if we are in train mode. if (isTrain && Attributes.getOutputNumAttributes() == 1) { Attributes.initStatistics(); } // A temporal vector is used to store the instances read. System.out.println("\n\n > Reading the data "); Vector tempSet = new Vector(1000, 100000); while ((line = parser.getLine()) != null) { // System.out.println (" > Data line: " + line ); tempSet.addElement(new Instance(line, isTrain, tempSet.size())); } // The vector of instances is converted to an array of instances. int sizeInstance = tempSet.size(); System.out.println(" > Number of instances read: " + tempSet.size()); instanceSet = new Instance[sizeInstance]; for (int i = 0; i < sizeInstance; i++) { instanceSet[i] = (Instance) tempSet.elementAt(i); } // System.out.println("After converting all instances"); // System.out.println("The error logger has any error: "+errorLogger.getNumErrors()); if (errorLogger.getNumErrors() > 0) { System.out.println( "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format."); for (int k = 0; k < errorLogger.getNumErrors(); k++) { errorLogger.getError(k).print(); } throw new DatasetException( "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format", errorLogger.getAllErrors()); } System.out.println( "\n > Finishing the statistics: (isTrain)" + isTrain + ", (# out attributes)" + Attributes.getOutputNumAttributes()); // If being on a train dataset, the statistics are finished if (isTrain && Attributes.getOutputNumAttributes() == 1) { Attributes.finishStatistics(); } System.out.println(" >> File LOADED CORRECTLY!!"); } // end of InstanceSet constructor.