/** * This method reads all the information in a DB and load it to memory. * * @param fileName is the database file name. * @param isTrain is a flag that indicate if the database is for a train or for a test. * @throws DatasetException if there is any semantical error in the input file. * @throws HeaderFormatException if there is any lexical or sintactical error in the header of the * input file */ public void readSet(String fileName, boolean isTrain) throws DatasetException, HeaderFormatException { String line; System.out.println("Opening the file: " + fileName + "."); // Parsing the header of the DB. errorLogger = new FormatErrorKeeper(); // Declaring an instance parser InstanceParser parser = new InstanceParser(fileName, isTrain); // Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs parseHeader(parser, isTrain); System.out.println( " The number of output attributes is: " + Attributes.getOutputNumAttributes()); // The attributes statistics are init if we are in train mode. if (isTrain && Attributes.getOutputNumAttributes() == 1) { Attributes.initStatistics(); } // A temporal vector is used to store the instances read. System.out.println("\n\n > Reading the data "); Vector tempSet = new Vector(1000, 100000); while ((line = parser.getLine()) != null) { // System.out.println (" > Data line: " + line ); tempSet.addElement(new Instance(line, isTrain, tempSet.size())); } // The vector of instances is converted to an array of instances. int sizeInstance = tempSet.size(); System.out.println(" > Number of instances read: " + tempSet.size()); instanceSet = new Instance[sizeInstance]; for (int i = 0; i < sizeInstance; i++) { instanceSet[i] = (Instance) tempSet.elementAt(i); } // System.out.println("After converting all instances"); // System.out.println("The error logger has any error: "+errorLogger.getNumErrors()); if (errorLogger.getNumErrors() > 0) { System.out.println( "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format."); for (int k = 0; k < errorLogger.getNumErrors(); k++) { errorLogger.getError(k).print(); } throw new DatasetException( "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format", errorLogger.getAllErrors()); } System.out.println( "\n > Finishing the statistics: (isTrain)" + isTrain + ", (# out attributes)" + Attributes.getOutputNumAttributes()); // If being on a train dataset, the statistics are finished if (isTrain && Attributes.getOutputNumAttributes() == 1) { Attributes.finishStatistics(); } System.out.println(" >> File LOADED CORRECTLY!!"); } // end of InstanceSet constructor.