Esempio n. 1
0
  /**
   * It reads the information in the header of the file. It reads relation's name, attributes'
   * names, and inputs and outputs.
   *
   * @param parser is the parser of the data set
   * @param isTrain is a boolean indicating if this is a train set (and so parameters information
   *     must be read) or a test set (parameters information has not to be read).
   */
  public void parseHeader(InstanceParser parser, boolean isTrain) {

    // 1. Declaration of variables
    Vector inputAttrNames = new Vector();
    Vector outputAttrNames = new Vector();

    boolean inputsDef = false;
    boolean outputsDef = false;

    String line, aux;
    header = "";

    int attCount = 0, lineCount = 0;

    attHeader = null;

    while (!(line = parser.getLine().trim()).equalsIgnoreCase("@data")) {
      line = line.trim();
      // System.out.println ("  > Line read: " + line +"." );
      lineCount++;
      if (line.toLowerCase().indexOf("@relation") != -1) {
        if (isTrain) Attributes.setRelationName(line.replaceAll("@relation", ""));
      }

      if (line.toLowerCase().indexOf("@attribute") != -1) {
        if (isTrain) insertAttribute(line);
        attCount++;
      }

      if (line.toLowerCase().indexOf("@inputs") != -1) {
        attHeader = header;
        inputsDef = true;

        aux = line.substring(8);

        if (isTrain) insertInputOutput(aux, lineCount, inputAttrNames, "inputs", isTrain);
      }

      if (line.toLowerCase().indexOf("@outputs") != -1) {
        if (attHeader == null) attHeader = header;
        outputsDef = true;
        // System.out.println ( " >>> Defining the output !!!");

        aux = line.substring(8);
        if (isTrain) insertInputOutput(aux, lineCount, outputAttrNames, "outputs", isTrain);

        System.out.println(" >> Size of the output is: " + outputAttrNames.size());
      }
      header += line + "\n";
    }
    if (attHeader == null) attHeader = header;

    processInputsAndOutputs(isTrain, inputsDef, outputsDef, outputAttrNames, inputAttrNames);
  } // end headerParse
Esempio n. 2
0
  /**
   * This method reads all the information in a DB and load it to memory.
   *
   * @param fileName is the database file name.
   * @param isTrain is a flag that indicate if the database is for a train or for a test.
   * @throws DatasetException if there is any semantical error in the input file.
   * @throws HeaderFormatException if there is any lexical or sintactical error in the header of the
   *     input file
   */
  public void readSet(String fileName, boolean isTrain)
      throws DatasetException, HeaderFormatException {
    String line;

    System.out.println("Opening the file: " + fileName + ".");
    // Parsing the header of the DB.
    errorLogger = new FormatErrorKeeper();

    // Declaring an instance parser
    InstanceParser parser = new InstanceParser(fileName, isTrain);

    // Reading information in the header, i.e., @relation, @attribute, @inputs and @outputs
    parseHeader(parser, isTrain);

    System.out.println(
        " The number of output attributes is: " + Attributes.getOutputNumAttributes());

    // The attributes statistics are init if we are in train mode.
    if (isTrain && Attributes.getOutputNumAttributes() == 1) {
      Attributes.initStatistics();
    }

    // A temporal vector is used to store the instances read.

    System.out.println("\n\n  > Reading the data ");
    Vector tempSet = new Vector(1000, 100000);
    while ((line = parser.getLine()) != null) {
      // System.out.println ("    > Data line: " + line );
      tempSet.addElement(new Instance(line, isTrain, tempSet.size()));
    }

    // The vector of instances is converted to an array of instances.
    int sizeInstance = tempSet.size();
    System.out.println("    > Number of instances read: " + tempSet.size());
    instanceSet = new Instance[sizeInstance];
    for (int i = 0; i < sizeInstance; i++) {
      instanceSet[i] = (Instance) tempSet.elementAt(i);
    }
    // System.out.println("After converting all instances");

    // System.out.println("The error logger has any error: "+errorLogger.getNumErrors());
    if (errorLogger.getNumErrors() > 0) {

      System.out.println(
          "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format.");
      for (int k = 0; k < errorLogger.getNumErrors(); k++) {
        errorLogger.getError(k).print();
      }
      throw new DatasetException(
          "There has been " + errorLogger.getAllErrors().size() + " errors in the Dataset format",
          errorLogger.getAllErrors());
    }

    System.out.println(
        "\n  > Finishing the statistics: (isTrain)"
            + isTrain
            + ", (# out attributes)"
            + Attributes.getOutputNumAttributes());
    // If being on a train dataset, the statistics are finished
    if (isTrain && Attributes.getOutputNumAttributes() == 1) {
      Attributes.finishStatistics();
    }

    System.out.println("  >> File LOADED CORRECTLY!!");
  } // end of InstanceSet constructor.