Ejemplo n.º 1
0
  /**
   * Reads schema from the KEEL file
   *
   * @param jobFilename Name of the KEEL dataset file
   */
  private static byte[] readSchema(String fileName) throws IOException, DatasetException {

    KeelDataSet dataset = new KeelDataSet(fileName);
    dataset.open();

    File file = new File(fileName);

    List<String> inputIds = new ArrayList<String>();
    List<String> outputIds = new ArrayList<String>();

    Reader reader = new BufferedReader(new FileReader(file));
    String line = ((BufferedReader) reader).readLine();
    line = line.replace("real[", "real [");
    line = line.replace("integer[", "integer [");
    line = line.replace("{", " {");
    StringTokenizer elementLine = new StringTokenizer(line);
    String element = elementLine.nextToken();

    while (!element.equalsIgnoreCase("@data")) {

      if (element.equalsIgnoreCase("@inputs")) {
        while (elementLine.hasMoreTokens()) {
          StringTokenizer commaTokenizer = new StringTokenizer(elementLine.nextToken(), ",");
          while (commaTokenizer.hasMoreTokens()) inputIds.add(commaTokenizer.nextToken());
        }
      } else if (element.equalsIgnoreCase("@outputs")) {
        while (elementLine.hasMoreTokens()) {
          StringTokenizer commaTokenizer = new StringTokenizer(elementLine.nextToken(), ",");
          while (commaTokenizer.hasMoreTokens()) outputIds.add(commaTokenizer.nextToken());
        }
      }

      // Next line of the file
      line = ((BufferedReader) reader).readLine();
      while (line.startsWith("%") || line.equalsIgnoreCase(""))
        line = ((BufferedReader) reader).readLine();

      line = line.replace("real[", "real [");
      line = line.replace("integer[", "integer [");
      line = line.replace("{", " {");
      elementLine = new StringTokenizer(line);
      element = elementLine.nextToken();
    }

    IMetadata metadata = dataset.getMetadata();
    byte[] schema = new byte[metadata.numberOfAttributes()];

    if (inputIds.isEmpty() || outputIds.isEmpty()) {
      for (int i = 0; i < schema.length; i++) {
        if (i != (schema.length - 1)) schema[i] = 1;
        else {
          IAttribute outputAttribute = metadata.getAttribute(i);
          schema[i] = 2;
          consoleReporter.setOutputAttribute(outputAttribute);
        }
      }
    } else {
      for (int i = 0; i < schema.length; i++) {
        if (inputIds.contains(metadata.getAttribute(i).getName())) schema[i] = 1;
        else if (outputIds.contains(metadata.getAttribute(i).getName())) {
          IAttribute outputAttribute = metadata.getAttribute(i);
          schema[i] = 2;
          consoleReporter.setOutputAttribute(outputAttribute);
        } else schema[i] = -1;
      }
    }

    StringBuffer header = new StringBuffer();
    header.append("@relation " + dataset.getName() + "\n");
    for (int i = 0; i < metadata.numberOfAttributes(); i++) {
      IAttribute attribute = metadata.getAttribute(i);
      header.append("@attribute " + attribute.getName() + " ");
      if (attribute.getType() == AttributeType.Categorical) {
        CategoricalAttribute catAtt = (CategoricalAttribute) attribute;

        Interval interval = catAtt.intervalValues();

        header.append("{");
        for (int j = (int) interval.getLeft(); j <= interval.size() + 1; j++) {
          header.append(catAtt.show(j) + (j != interval.size() + 1 ? "," : "}\n"));
        }
      } else if (attribute.getType() == AttributeType.IntegerNumerical) {
        IntegerNumericalAttribute intAtt = (IntegerNumericalAttribute) attribute;
        header.append(
            "integer["
                + (int) intAtt.intervalValues().getLeft()
                + ","
                + (int) intAtt.intervalValues().getRight()
                + "]\n");
      } else if (attribute.getType() == AttributeType.DoubleNumerical) {
        RealNumericalAttribute doubleAtt = (RealNumericalAttribute) attribute;
        header.append(
            "real["
                + doubleAtt.intervalValues().getLeft()
                + ","
                + doubleAtt.intervalValues().getRight()
                + "]\n");
      }
    }
    header.append("@data\n");
    consoleReporter.setHeader(header.toString());

    dataset.close();
    return schema;
  }
Ejemplo n.º 2
0
  /**
   * Configure the execution of the algorithm.
   *
   * @param jobFilename Name of the KEEL file with properties of the execution
   */
  @SuppressWarnings("unchecked")
  private static void configureJob(String jobFilename) {

    Properties props = new Properties();

    try {
      InputStream paramsFile = new FileInputStream(jobFilename);
      props.load(paramsFile);
      paramsFile.close();
    } catch (IOException ioe) {
      ioe.printStackTrace();
      System.exit(0);
    }

    // Files training and test
    String trainFile;
    String testFile;
    StringTokenizer tokenizer = new StringTokenizer(props.getProperty("inputData"));
    tokenizer.nextToken();
    trainFile = tokenizer.nextToken();
    trainFile = trainFile.substring(1, trainFile.length() - 1);
    testFile = tokenizer.nextToken();
    testFile = testFile.substring(1, testFile.length() - 1);

    // Classification or Regression ??
    byte[] schema = null;
    try {
      schema = readSchema(trainFile);
    } catch (IOException e) {
      e.printStackTrace();
    } catch (DatasetException e) {
      e.printStackTrace();
    }

    // Algorithm auxiliar configuration
    XMLConfiguration algConf = new XMLConfiguration();
    algConf.setRootElementName("algorithm");
    algConf.addProperty("population-size", 1000);
    algConf.addProperty("max-of-generations", Integer.parseInt(props.getProperty("Generations")));
    algConf.addProperty("creation-ratio", 10.0);
    algConf.addProperty("percentage-second-mutator", 10);
    algConf.addProperty("max-generations-without-improving-mean", 20);
    algConf.addProperty("max-generations-without-improving-best", 20);
    algConf.addProperty("fitness-difference", 0.0000001);
    algConf.addProperty(
        "species[@type]", "keel.Algorithms.Neural_Networks.NNEP_Common.NeuralNetIndividualSpecies");
    algConf.addProperty(
        "species.neural-net-type",
        "keel.Algorithms.Neural_Networks.NNEP_Clas.neuralnet.NeuralNetClassifier");
    if (props.getProperty("Transfer").equals("Product_Unit")) {
      algConf.addProperty(
          "species.hidden-layer[@type]",
          "keel.Algorithms.Neural_Networks.NNEP_Common.neuralnet.ExpLayer");
      algConf.addProperty("species.hidden-layer[@biased]", false);
      algConf.addProperty("evaluator[@log-input-data]", true);
    } else {
      algConf.addProperty(
          "species.hidden-layer[@type]",
          "keel.Algorithms.Neural_Networks.NNEP_Common.neuralnet.SigmLayer");
      algConf.addProperty("species.hidden-layer[@biased]", true);
    }
    int neurons = Integer.parseInt(props.getProperty("Hidden_nodes"));
    algConf.addProperty(
        "species.hidden-layer.minimum-number-of-neurons", (neurons / 3) != 0 ? (neurons / 3) : 1);
    algConf.addProperty(
        "species.hidden-layer.initial-maximum-number-of-neurons",
        (neurons / 2) != 0 ? (neurons / 2) : 1);
    algConf.addProperty("species.hidden-layer.maximum-number-of-neurons", neurons);
    algConf.addProperty(
        "species.hidden-layer.initiator-of-links",
        "keel.Algorithms.Neural_Networks.NNEP_Common.initiators.RandomInitiator");
    algConf.addProperty(
        "species.hidden-layer.weight-range[@type]", "net.sf.jclec.util.range.Interval");
    algConf.addProperty("species.hidden-layer.weight-range[@closure]", "closed-closed");
    algConf.addProperty("species.hidden-layer.weight-range[@left]", -5.0);
    algConf.addProperty("species.hidden-layer.weight-range[@right]", 5.0);
    algConf.addProperty(
        "species.output-layer[@type]",
        "keel.Algorithms.Neural_Networks.NNEP_Common.neuralnet.LinearLayer");
    algConf.addProperty("species.output-layer[@biased]", true);
    algConf.addProperty(
        "species.output-layer.initiator-of-links",
        "keel.Algorithms.Neural_Networks.NNEP_Common.initiators.RandomInitiator");
    algConf.addProperty(
        "species.output-layer.weight-range[@type]", "net.sf.jclec.util.range.Interval");
    algConf.addProperty("species.output-layer.weight-range[@closure]", "closed-closed");
    algConf.addProperty("species.output-layer.weight-range[@left]", -5.0);
    algConf.addProperty("species.output-layer.weight-range[@right]", 5.0);
    algConf.addProperty(
        "evaluator[@type]",
        "keel.Algorithms.Neural_Networks.NNEP_Clas.problem.classification.softmax.SoftmaxClassificationProblemEvaluator");

    algConf.addProperty("evaluator[@normalize-data]", true);

    algConf.addProperty(
        "evaluator.error-function",
        "keel.Algorithms.Neural_Networks.NNEP_Clas.problem.errorfunctions.LogisticErrorFunction");

    algConf.addProperty("evaluator.input-interval[@closure]", "closed-closed");

    if (props.getProperty("Transfer").equals("Product_Unit")) {
      algConf.addProperty("evaluator.input-interval[@left]", 1.0);
      algConf.addProperty("evaluator.input-interval[@right]", 2.0);
    } else {
      algConf.addProperty("evaluator.input-interval[@left]", 0.1);
      algConf.addProperty("evaluator.input-interval[@right]", 0.9);
    }

    algConf.addProperty("evaluator.output-interval[@closure]", "closed-closed");

    algConf.addProperty("evaluator.output-interval[@left]", 0.0);
    algConf.addProperty("evaluator.output-interval[@right]", 1.0);

    algConf.addProperty(
        "provider[@type]", "keel.Algorithms.Neural_Networks.NNEP_Common.NeuralNetCreator");
    algConf.addProperty(
        "mutator1[@type]",
        "keel.Algorithms.Neural_Networks.NNEP_Common.mutators.structural.StructuralMutator");
    algConf.addProperty("mutator1.temperature-exponent[@value]", 1.0);
    algConf.addProperty("mutator1.significative-weigth[@value]", 0.0000001);
    algConf.addProperty("mutator1.neuron-ranges.added[@min]", 1);
    algConf.addProperty("mutator1.neuron-ranges.added[@max]", 2);
    algConf.addProperty("mutator1.neuron-ranges.deleted[@min]", 1);
    algConf.addProperty("mutator1.neuron-ranges.deleted[@max]", 2);
    algConf.addProperty("mutator1.links-ranges[@relative]", true);
    algConf.addProperty("mutator1.links-ranges.percentages[@hidden]", 30);
    algConf.addProperty("mutator1.links-ranges.percentages[@output]", 5);
    algConf.addProperty(
        "mutator2[@type]",
        "keel.Algorithms.Neural_Networks.NNEP_Common.mutators.parametric.ParametricSRMutator");
    algConf.addProperty("mutator2.temperature-exponent[@value]", 0.0);
    algConf.addProperty("mutator2.amplitude[@value]", 5.0);
    algConf.addProperty("mutator2.fitness-difference[@value]", 0.0000001);
    algConf.addProperty("mutator2.initial-alpha-values[@input]", 0.5);
    algConf.addProperty("mutator2.initial-alpha-values[@output]", 1.0);
    algConf.addProperty(
        "rand-gen-factory[@type]",
        "keel.Algorithms.Neural_Networks.NNEP_Common.util.random.RanNnepFactory");
    algConf.addProperty("rand-gen-factory[@seed]", Integer.parseInt(props.getProperty("seed")));

    // Neural Net Algorithm
    algorithm = new CCRElitistNeuralNetAlgorithm();

    algorithm.configure(algConf);

    // Read data
    ProblemEvaluator evaluator = (ProblemEvaluator) algorithm.getEvaluator();

    evaluator.readData(schema, new KeelDataSet(trainFile), new KeelDataSet(testFile));

    ((NeuralNetIndividualSpecies) algorithm.getSpecies())
        .setNOfInputs(evaluator.getTrainData().getNofinputs());
    ((NeuralNetIndividualSpecies) algorithm.getSpecies())
        .setNOfOutputs(evaluator.getTrainData().getNofoutputs() - 1);

    // Read output files
    tokenizer = new StringTokenizer(props.getProperty("outputData"));
    String trainResultFile = tokenizer.nextToken();
    trainResultFile = trainResultFile.substring(1, trainResultFile.length() - 1);
    consoleReporter.setTrainResultFile(trainResultFile);
    String testResultFile = tokenizer.nextToken();
    testResultFile = testResultFile.substring(1, testResultFile.length() - 1);
    consoleReporter.setTestResultFile(testResultFile);
    String bestModelResultFile = tokenizer.nextToken();
    bestModelResultFile = bestModelResultFile.substring(1, bestModelResultFile.length() - 1);
    consoleReporter.setBestModelResultFile(bestModelResultFile);

    listeners.add(consoleReporter);
  }