/** * Reads schema from the KEEL file * * @param jobFilename Name of the KEEL dataset file */ private static byte[] readSchema(String fileName) throws IOException, DatasetException { KeelDataSet dataset = new KeelDataSet(fileName); dataset.open(); File file = new File(fileName); List<String> inputIds = new ArrayList<String>(); List<String> outputIds = new ArrayList<String>(); Reader reader = new BufferedReader(new FileReader(file)); String line = ((BufferedReader) reader).readLine(); line = line.replace("real[", "real ["); line = line.replace("integer[", "integer ["); line = line.replace("{", " {"); StringTokenizer elementLine = new StringTokenizer(line); String element = elementLine.nextToken(); while (!element.equalsIgnoreCase("@data")) { if (element.equalsIgnoreCase("@inputs")) { while (elementLine.hasMoreTokens()) { StringTokenizer commaTokenizer = new StringTokenizer(elementLine.nextToken(), ","); while (commaTokenizer.hasMoreTokens()) inputIds.add(commaTokenizer.nextToken()); } } else if (element.equalsIgnoreCase("@outputs")) { while (elementLine.hasMoreTokens()) { StringTokenizer commaTokenizer = new StringTokenizer(elementLine.nextToken(), ","); while (commaTokenizer.hasMoreTokens()) outputIds.add(commaTokenizer.nextToken()); } } // Next line of the file line = ((BufferedReader) reader).readLine(); while (line.startsWith("%") || line.equalsIgnoreCase("")) line = ((BufferedReader) reader).readLine(); line = line.replace("real[", "real ["); line = line.replace("integer[", "integer ["); line = line.replace("{", " {"); elementLine = new StringTokenizer(line); element = elementLine.nextToken(); } IMetadata metadata = dataset.getMetadata(); byte[] schema = new byte[metadata.numberOfAttributes()]; if (inputIds.isEmpty() || outputIds.isEmpty()) { for (int i = 0; i < schema.length; i++) { if (i != (schema.length - 1)) schema[i] = 1; else { IAttribute outputAttribute = metadata.getAttribute(i); schema[i] = 2; consoleReporter.setOutputAttribute(outputAttribute); } } } else { for (int i = 0; i < schema.length; i++) { if (inputIds.contains(metadata.getAttribute(i).getName())) schema[i] = 1; else if (outputIds.contains(metadata.getAttribute(i).getName())) { IAttribute outputAttribute = metadata.getAttribute(i); schema[i] = 2; consoleReporter.setOutputAttribute(outputAttribute); } else schema[i] = -1; } } StringBuffer header = new StringBuffer(); header.append("@relation " + dataset.getName() + "\n"); for (int i = 0; i < metadata.numberOfAttributes(); i++) { IAttribute attribute = metadata.getAttribute(i); header.append("@attribute " + attribute.getName() + " "); if (attribute.getType() == AttributeType.Categorical) { CategoricalAttribute catAtt = (CategoricalAttribute) attribute; Interval interval = catAtt.intervalValues(); header.append("{"); for (int j = (int) interval.getLeft(); j <= interval.size() + 1; j++) { header.append(catAtt.show(j) + (j != interval.size() + 1 ? "," : "}\n")); } } else if (attribute.getType() == AttributeType.IntegerNumerical) { IntegerNumericalAttribute intAtt = (IntegerNumericalAttribute) attribute; header.append( "integer[" + (int) intAtt.intervalValues().getLeft() + "," + (int) intAtt.intervalValues().getRight() + "]\n"); } else if (attribute.getType() == AttributeType.DoubleNumerical) { RealNumericalAttribute doubleAtt = (RealNumericalAttribute) attribute; header.append( "real[" + doubleAtt.intervalValues().getLeft() + "," + doubleAtt.intervalValues().getRight() + "]\n"); } } header.append("@data\n"); consoleReporter.setHeader(header.toString()); dataset.close(); return schema; }