public void make(File source, DataSet header, DataSet master) throws Exception { this.header = header; this.master = master; if (header == null || master == null) throw new Exception("Dataset is empty"); long t = System.currentTimeMillis(); InputStream inp = new FileInputStream(source); Workbook oldBook = WorkbookFactory.create(inp); Sheet oldSheet = oldBook.getSheetAt(0); Workbook newBook = new HSSFWorkbook(); Sheet newSheet = newBook.createSheet(oldSheet.getSheetName()); init(newBook); process(oldSheet, newSheet); File target = File.createTempFile("libra", ".xls"); target.deleteOnExit(); FileOutputStream fileOut = new FileOutputStream(target); newBook.write(fileOut); fileOut.close(); oldBook.close(); inp.close(); Desktop.getDesktop().open(target); System.out.println(System.currentTimeMillis() - t); }
public static void generateTrainingDataFromFile( String fileLocation) // Requires that the original file had the metadata and requires that this // file is formated the same in first sheet { testDataLL = (LinkedList<String[]>) dataLL.clone(); actualClassifications = (LinkedList<String>) classificationsLL.clone(); FileInputStream file; try { file = new FileInputStream(new File(fileLocation)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; String classification = ""; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classification = cell.toString(); offset++; } } // Even though data and classifications are not really used add it onto the end so it is // still complete for in the event they end up being used in a later version dataLL.add(data); classificationsLL.add(classification); trainingDataLL.add(data); knownClassifications.add(classification); // Check to see if we have seen that classification yet int occurrences = 0; for (int i = 0; i < classificationTypes.size() && occurrences == 0; i++) { if (classificationTypes.get(i).compareTo(classification) == 0) { occurrences = 1; } } if (occurrences == 0) { classificationTypes.add(classification); } } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }
@Test public void test_writeFile() { String path = "D:\\test.xlsx"; // String path = "D:\\test.xls"; // String path = "D:/"; // String path = "D:\\"; long startTime = System.currentTimeMillis(); try { ExcelUtil.writeFile(path); } catch (IOException e) { e.printStackTrace(); } System.out.println("cost :" + (System.currentTimeMillis() - startTime)); }
public void readProcessSpreadSheet() { processSpreadSheet = System.getProperty("processSpreadSheet"); System.out.println("processSpreadSheet: " + processSpreadSheet); if (processCodesList.size() == 0) { if (processSpreadSheet != null && !processSpreadSheet.equals("")) { Workbook wb1 = null; try { wb1 = new XSSFWorkbook(processSpreadSheet); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } Sheet sheet = wb1.getSheetAt(0); Row row; Cell cell; int rows; // No of rows rows = sheet.getPhysicalNumberOfRows(); int cols = 0; // No of columns int tmp = 0; // This trick ensures that we get the data properly even if it // doesn't start from first few rows for (int i = 0; i < 10 || i < rows; i++) { row = sheet.getRow(i); if (row != null) { tmp = sheet.getRow(i).getPhysicalNumberOfCells(); // out.println("tmp value"+tmp); if (tmp > cols) { cols = tmp; } } } ProcessDefinition tempProcessCode; for (int r1 = 0; r1 < rows; r1++) { tempProcessCode = new ProcessDefinition(); row = sheet.getRow(r1); if (row != null) { if (row.getCell(0) != null) { for (int counter = 0; counter < cols; counter++) { cell = row.getCell((short) counter); // cell = row.getCell(1); if (counter == 0) { if (cell != null) { tempProcessCode.setProcessName(cell.getStringCellValue()); } else { tempProcessCode.setProcessName(""); } } else if (counter == 1) { if (cell != null) { tempProcessCode.setProcessCode(cell.getStringCellValue()); } else { tempProcessCode.setProcessCode(""); } } else if (counter == 2) { if (cell != null) { tempProcessCode.setIaeaCode(cell.getStringCellValue()); } else { tempProcessCode.setIaeaCode(""); } } else if (counter == 3) { if (cell != null) { tempProcessCode.setProcessDescription(cell.getStringCellValue()); // System.out.println(tempProcessCode.getProcessDescription()); } else { tempProcessCode.setProcessDescription(""); // System.out.println("Process Description EMpty"); } } } } } else { rows++; } processCodesList.add(tempProcessCode); } } } System.out.println(processCodesList.size()); }
public static void writeExcelFile(String fileLocation) { try { FileOutputStream fileOut = new FileOutputStream(fileLocation); HSSFWorkbook workbook = new HSSFWorkbook(); Font bold = workbook.createFont(); // Create font bold.setBoldweight(Font.BOLDWEIGHT_BOLD); // Make font bold CellStyle correctCell = workbook.createCellStyle(); correctCell.setFillForegroundColor(HSSFColor.GREEN.index); correctCell.setFillBackgroundColor(HSSFColor.GREEN.index); correctCell.setFillPattern(CellStyle.SOLID_FOREGROUND); CellStyle incorrectCell = workbook.createCellStyle(); incorrectCell.setFillForegroundColor(HSSFColor.RED.index); incorrectCell.setFillBackgroundColor(HSSFColor.RED.index); incorrectCell.setFillPattern(CellStyle.SOLID_FOREGROUND); CellStyle classificationCells = workbook.createCellStyle(); classificationCells.setFillForegroundColor(HSSFColor.YELLOW.index); classificationCells.setFillBackgroundColor(HSSFColor.YELLOW.index); classificationCells.setFillPattern(CellStyle.SOLID_FOREGROUND); CellStyle attributeNameCells = workbook.createCellStyle(); attributeNameCells.setFont(bold); CellStyle classificationAttributeCell = workbook.createCellStyle(); classificationAttributeCell.setFillForegroundColor(HSSFColor.YELLOW.index); classificationAttributeCell.setFillBackgroundColor(HSSFColor.YELLOW.index); classificationAttributeCell.setFillPattern(CellStyle.SOLID_FOREGROUND); classificationAttributeCell.setFont(bold); Sheet worksheet = workbook.createSheet("Results"); Row currRow = worksheet.createRow(0); for (int attribute = 0; attribute < metadataLL.size() + 1; attribute++) { Cell currCell = currRow.createCell(attribute); if (attribute < metadataLL.size()) { currCell.setCellValue(metadataLL.get(attribute)[0]); if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) { currCell.setCellStyle(classificationAttributeCell); } else { currCell.setCellStyle(attributeNameCells); } } else { currCell.setCellValue("Guessed Classification"); currCell.setCellStyle(attributeNameCells); } } int correct = 0; int incorrect = 0; for (int node = 0; node < testDataLL.size(); node++) { currRow = worksheet.createRow(node + 1); // Offset by one since first row is header data int classifierCompleted = 0; // Necessary for if data does not end in classifier for (int attribute = 0; attribute < metadataLL.size() + 2; attribute++) // +1 for the row for guessed data +1 for the row that contains { Cell currCell = currRow.createCell(attribute); if (attribute < metadataLL.size()) // Print testingData { if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) { currCell.setCellValue(actualClassifications.get(node)); currCell.setCellStyle(classificationCells); classifierCompleted++; } else { currCell.setCellValue(testDataLL.get(node)[attribute - classifierCompleted]); } } else if (attribute == metadataLL.size()) // Print guessed classification { currCell.setCellValue(guessedClassifications.get(node)); if (guessedClassifications.get(node).compareTo(actualClassifications.get(node)) == 0) { currCell.setCellStyle(correctCell); correct++; } else { currCell.setCellStyle(incorrectCell); incorrect++; } if (node == testDataLL.size() - 1) // If this is the last loop { double precentRight = (double) correct / (correct + incorrect); currRow = worksheet.createRow(node + 2); currCell = currRow.createCell(attribute); currCell.setCellValue(precentRight); if (precentRight > .90) { correctCell.setDataFormat(workbook.createDataFormat().getFormat("0.000%")); currCell.setCellStyle(correctCell); } else { incorrectCell.setDataFormat(workbook.createDataFormat().getFormat("0.000%")); currCell.setCellStyle(incorrectCell); } } } else if (attribute == metadataLL.size() + 1) // Print potential bad training data if the flag is true { if (unseenDataFlag.get(node)) { currCell.setCellValue( "This node an attribute value not in the training set, classifier selected is based on most frequent classifier. If laplacian smoothing is 1 or more this likely wont happen"); // TODO make this a bit shorter } } } } worksheet = workbook.createSheet("Training Data"); currRow = worksheet.createRow(0); for (int attribute = 0; attribute < metadataLL.size(); attribute++) { Cell currCell = currRow.createCell(attribute); currCell.setCellValue(metadataLL.get(attribute)[0]); currCell.setCellStyle(attributeNameCells); } for (int node = 0; node < trainingDataLL.size(); node++) { currRow = worksheet.createRow(node + 1); // Offset by one since first row is header data int classifierCompleted = 0; // Necessary for if data does not end in classifier for (int attribute = 0; attribute < metadataLL.size(); attribute++) { Cell currCell = currRow.createCell(attribute); if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) { currCell.setCellValue(knownClassifications.get(node)); classifierCompleted++; } else { currCell.setCellValue(trainingDataLL.get(node)[attribute - classifierCompleted]); } } } worksheet = workbook.createSheet("Likelihood"); currRow = worksheet.createRow(0); int largestAttributeSize = 0; for (int attribute = 0; attribute < classifier.size(); attribute++) { if (classifier.get(attribute).size() > largestAttributeSize) { largestAttributeSize = classifier.get(attribute).size(); } } // Label attributes along the top for (int i = 0; i < metadataLL.size(); i++) { if (i == 0) { Cell currCell = currRow.createCell(i); // currCell.setCellValue("Attributes"); currCell.setCellStyle(attributeNameCells); } else { Cell currCell = currRow.createCell(i); currCell.setCellValue( metadataLL .get(i - 1)[0]); // -1 since the first cell does not contain a attribute name currCell.setCellStyle(attributeNameCells); } } // List possible classifications on the side and classification likelihoods at the end for (int i = 0; i < (largestAttributeSize * (classificationTypes.size() + 1) + classificationTypes.size() + 1); i++) // +1 since the first row of each stride lists each attributes string of what // occurrence the likelihoods are displaying { // +classificationTypes.size() so we can list the classification types likelihood at the end currRow = worksheet.createRow(i + 1); // +1 since first row is attribute names Cell currCell = currRow.createCell(0); int currentClassificationType = i % (classificationTypes.size() + 1); // +1 since the first row of each stride lists each attributes string of // what occurrence the likelihoods are displaying // List the classification type of each row along the side if (i < largestAttributeSize * (classificationTypes.size() + 1)) // +1 since the first row of each stride lists each attributes string of // what occurrence the likelihoods are displaying { for (int j = 0; j < classificationTypes.size() + 1; j++) // +1 since the first row of each stride lists each attributes string of what // occurrence the likelihoods are displaying { if (currentClassificationType == 0) { // Do nothing for now may have it say something later } else if (currentClassificationType == j) { currCell.setCellValue( classificationTypes.get( j - 1)); // -1 since the first cell does not contain a classification type } } } else // List the likelihood of each classification at the end { for (int j = 0; j < classificationTypes.size() + 1; j++) // +1 since the first row of each stride lists each attributes string of what // occurrence the likelihoods are displaying { if (currentClassificationType == 0) { // Do nothing for now may have it say value later } else if (currentClassificationType == j) { currCell.setCellValue( "Likelihood of: " + classificationTypes.get(j - 1) + " is " + classificationLikelihood.get( j - 1)); // -1 since the first cell does not contain a classification type } } } currCell.setCellStyle(attributeNameCells); } // List the data for (int attribute = 0; attribute < classifier.size(); attribute++) { for (int occurrences = 0; occurrences < classifier.get(attribute).size(); occurrences++) { for (int classification = 0; classification < classifier.get(attribute).get(occurrences).length; classification++) { currRow = worksheet.getRow( (occurrences * classifier.get(attribute).get(occurrences).length + classification) + 1); // +1 since first row is attribute names Cell currCell = currRow.createCell( (attribute) + 1); // TODO figure out why this errors out at i:0 j:4 k:0 // largestAttributeSize:105 on kidney dataset currCell.setCellValue(classifier.get(attribute).get(occurrences)[classification]); } } } workbook.write(fileOut); workbook.close(); workbook.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); e.printStackTrace(); System.exit(0); } catch (IOException e) { System.out.println("Unable to output file, is the output destination writelocked?"); e.printStackTrace(); System.exit(0); } }
public static void readExcelFile(String fileName) { FileInputStream file; try { file = new FileInputStream(new File(fileName)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet // Set just in case metadata is incomplete or malformed classificationLocation = sheet1.getRow(0).getPhysicalNumberOfCells() - 1; // Minus one since classificationLocation includes 0 and getPhysicalNumberOfCells // does not Sheet sheet2 = excelFile.getSheetAt(1); // Metadata sheet // Loop based on number of attribute names for (int i = 0; i < sheet2.getRow(0).getPhysicalNumberOfCells(); i++) { String[] metadata = new String[METADATASIZE]; // Construct metadata Row currRow = sheet2.getRow(0); // This should be a row of names metadata[0] = currRow.getCell(i).toString(); currRow = sheet2.getRow(1); // This should be a row of data types (discrete or continuous) metadata[1] = currRow.getCell(i).toString(); currRow = sheet2.getRow(2); // This should say which one is the classifier if (currRow.getCell(i) == null || currRow.getCell(i).getCellType() == Cell.CELL_TYPE_BLANK) { metadata[2] = "attribute"; } else { metadata[2] = "classifier"; classificationLocation = i; } metadataLL.add(metadata); } for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classificationsLL.add(cell.toString()); // Moved to generate training data so that we do not get possible classifications from // unknown data since some denote unknown by saying ? // //Check to see if we have seen it yet // // int occurrences = 0; // for(int i = 0; i < classificationTypes.size(); i++) // { // if(classificationTypes.get(i).compareTo(cell.toString()) == 0) // { // occurrences++; // } // } // if(occurrences == 0) // { // classificationTypes.add(cell.toString()); // } offset++; } } dataLL.add(data); // classCount = temp.length; } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }