public static void generateTrainingDataFromFile( String fileLocation) // Requires that the original file had the metadata and requires that this // file is formated the same in first sheet { testDataLL = (LinkedList<String[]>) dataLL.clone(); actualClassifications = (LinkedList<String>) classificationsLL.clone(); FileInputStream file; try { file = new FileInputStream(new File(fileLocation)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; String classification = ""; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classification = cell.toString(); offset++; } } // Even though data and classifications are not really used add it onto the end so it is // still complete for in the event they end up being used in a later version dataLL.add(data); classificationsLL.add(classification); trainingDataLL.add(data); knownClassifications.add(classification); // Check to see if we have seen that classification yet int occurrences = 0; for (int i = 0; i < classificationTypes.size() && occurrences == 0; i++) { if (classificationTypes.get(i).compareTo(classification) == 0) { occurrences = 1; } } if (occurrences == 0) { classificationTypes.add(classification); } } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }
public static void readExcelFile(String fileName) { FileInputStream file; try { file = new FileInputStream(new File(fileName)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet // Set just in case metadata is incomplete or malformed classificationLocation = sheet1.getRow(0).getPhysicalNumberOfCells() - 1; // Minus one since classificationLocation includes 0 and getPhysicalNumberOfCells // does not Sheet sheet2 = excelFile.getSheetAt(1); // Metadata sheet // Loop based on number of attribute names for (int i = 0; i < sheet2.getRow(0).getPhysicalNumberOfCells(); i++) { String[] metadata = new String[METADATASIZE]; // Construct metadata Row currRow = sheet2.getRow(0); // This should be a row of names metadata[0] = currRow.getCell(i).toString(); currRow = sheet2.getRow(1); // This should be a row of data types (discrete or continuous) metadata[1] = currRow.getCell(i).toString(); currRow = sheet2.getRow(2); // This should say which one is the classifier if (currRow.getCell(i) == null || currRow.getCell(i).getCellType() == Cell.CELL_TYPE_BLANK) { metadata[2] = "attribute"; } else { metadata[2] = "classifier"; classificationLocation = i; } metadataLL.add(metadata); } for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classificationsLL.add(cell.toString()); // Moved to generate training data so that we do not get possible classifications from // unknown data since some denote unknown by saying ? // //Check to see if we have seen it yet // // int occurrences = 0; // for(int i = 0; i < classificationTypes.size(); i++) // { // if(classificationTypes.get(i).compareTo(cell.toString()) == 0) // { // occurrences++; // } // } // if(occurrences == 0) // { // classificationTypes.add(cell.toString()); // } offset++; } } dataLL.add(data); // classCount = temp.length; } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }