static void checkDataset() { Attribute[] outputs = Attributes.getOutputAttributes(); if (outputs.length != 1) { LogManager.printErr("Only datasets with one output are supported"); System.exit(1); } if (outputs[0].getType() != Attribute.NOMINAL) { LogManager.printErr("Output attribute should be nominal"); System.exit(1); } Parameters.numClasses = outputs[0].getNumNominalValues(); Parameters.numAttributes = Attributes.getInputAttributes().length; }
/** * The main method of the class * * @param script Name of the configuration script */ public KNN(Instance[] trainI, Instance[] testI) { // set parameters trainInst = trainI; testInst = testI; trainData = new double[trainInst.length][Parameters.numAttributes]; for (int i = 0; i < trainInst.length; ++i) System.arraycopy( trainInst[i].getAllInputValues(), 0, trainData[i], 0, Parameters.numAttributes); testData = new double[testInst.length][Parameters.numAttributes]; for (int i = 0; i < testInst.length; ++i) System.arraycopy( testInst[i].getAllInputValues(), 0, testData[i], 0, Parameters.numAttributes); k = Parameters.numNeighbors; if (Parameters.distanceType.equalsIgnoreCase("euclidean")) distanceType = EUCLIDEAN; else if (Parameters.distanceType.equalsIgnoreCase("manhattan")) distanceType = MANHATTAN; else if (Parameters.distanceType.equalsIgnoreCase("hvdm")) distanceType = HVDM; // read Data Files ----------------------- inputAtt = Attributes.getInputNumAttributes(); inputs = Attributes.getInputAttributes(); output = Attributes.getOutputAttribute(0); // Normalize the datasets normalizeTrain(); normalizeTest(); // Get the number of classes nClasses = Attributes.getOutputAttribute(0).getNumNominalValues(); // And the number of instances on each class nInstances = new int[nClasses]; Arrays.fill(nInstances, 0); for (int i = 0; i < trainOutput.length; i++) nInstances[trainOutput[i]]++; // Initialization of auxiliary structures if (distanceType == HVDM) { stdDev = new double[inputAtt]; calculateHVDM(); } }
/** Muestra por pantalla la regla actual */ public void mostrarRegla() { Attribute a[] = Attributes.getInputAttributes(); Attribute s[] = Attributes.getOutputAttributes(); System.out.print("Regla: "); for (int i = 0; i < this.antecedentes.size(); i++) { Atributo_valor av = antecedentes.get(i); System.out.print( "(" + a[av.getAtributo()].getName() + "," + a[av.getAtributo()].getNominalValue(av.getValor().intValue()) + ")"); if (i < this.antecedentes.size() - 1) System.out.print(" & "); } System.out.println(" -> (" + s[0].getName() + "," + this.consecuente + ")"); System.out.println("------------------------------------"); }
/** * This function builds the data matrix for reference data and normalizes inputs values * * @throws keel.Algorithms.Preprocess.Basic.CheckException Can not be normalized */ protected void normalizar() throws CheckException { int i, j, k; Instance temp; double caja[]; StringTokenizer tokens; boolean nulls[]; /*Check if dataset corresponding with a classification problem*/ if (Attributes.getOutputNumAttributes() < 1) { throw new CheckException( "This dataset haven?t outputs, so it not corresponding to a classification problem."); } else if (Attributes.getOutputNumAttributes() > 1) { throw new CheckException("This dataset have more of one output."); } if (Attributes.getOutputAttribute(0).getType() == Attribute.REAL) { throw new CheckException( "This dataset have an input attribute with floating values, so it not corresponding to a classification problem."); } entradas = Attributes.getInputAttributes(); salida = Attributes.getOutputAttribute(0); nEntradas = Attributes.getInputNumAttributes(); tokens = new StringTokenizer(training.getHeader(), " \n\r"); tokens.nextToken(); relation = tokens.nextToken(); datosTrain = new double[training.getNumInstances()][Attributes.getInputNumAttributes()]; clasesTrain = new int[training.getNumInstances()]; caja = new double[1]; nulosTrain = new boolean[training.getNumInstances()][Attributes.getInputNumAttributes()]; nominalTrain = new int[training.getNumInstances()][Attributes.getInputNumAttributes()]; realTrain = new double[training.getNumInstances()][Attributes.getInputNumAttributes()]; for (i = 0; i < training.getNumInstances(); i++) { temp = training.getInstance(i); nulls = temp.getInputMissingValues(); datosTrain[i] = training.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) if (nulls[j]) { datosTrain[i][j] = 0.0; nulosTrain[i][j] = true; } caja = training.getInstance(i).getAllOutputValues(); clasesTrain[i] = (int) caja[0]; for (k = 0; k < datosTrain[i].length; k++) { if (Attributes.getInputAttribute(k).getType() == Attribute.NOMINAL) { nominalTrain[i][k] = (int) datosTrain[i][k]; datosTrain[i][k] /= Attributes.getInputAttribute(k).getNominalValuesList().size() - 1; } else { realTrain[i][k] = datosTrain[i][k]; datosTrain[i][k] -= Attributes.getInputAttribute(k).getMinAttribute(); datosTrain[i][k] /= Attributes.getInputAttribute(k).getMaxAttribute() - Attributes.getInputAttribute(k).getMinAttribute(); if (Double.isNaN(datosTrain[i][k])) { datosTrain[i][k] = realTrain[i][k]; } } } } datosTest = new double[test.getNumInstances()][Attributes.getInputNumAttributes()]; clasesTest = new int[test.getNumInstances()]; caja = new double[1]; for (i = 0; i < test.getNumInstances(); i++) { temp = test.getInstance(i); nulls = temp.getInputMissingValues(); datosTest[i] = test.getInstance(i).getAllInputValues(); for (j = 0; j < nulls.length; j++) if (nulls[j]) { datosTest[i][j] = 0.0; } caja = test.getInstance(i).getAllOutputValues(); clasesTest[i] = (int) caja[0]; } } // end-method