/** * 将document写入xml文件中 * * @param document * @param xmlFile * @return */ public boolean docToXmlFile(Document document, String xmlFile) { // 删除生成文件名中的中文字符 xmlFile = xmlFile.replaceAll("([\u4E00-\u9FA5]+)|([\u4E00-\u9FA5]+)", ""); try { // 排版缩进的格式 OutputFormat format = OutputFormat.createPrettyPrint(); // 设置编码,可按需设置 format.setEncoding("gb2312"); // 创建XMLWriter对象,指定了写出文件及编码格式 XMLWriter writer = new XMLWriter( new OutputStreamWriter(new FileOutputStream(new File(xmlFile)), "gb2312"), format); // 写入 writer.write(document); // 立即写入 writer.flush(); // 关闭操作 writer.close(); System.out.println("输出xml文件到:" + xmlFile); return true; } catch (IOException e) { e.printStackTrace(); return false; } }
public static void generateTrainingDataFromFile( String fileLocation) // Requires that the original file had the metadata and requires that this // file is formated the same in first sheet { testDataLL = (LinkedList<String[]>) dataLL.clone(); actualClassifications = (LinkedList<String>) classificationsLL.clone(); FileInputStream file; try { file = new FileInputStream(new File(fileLocation)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; String classification = ""; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classification = cell.toString(); offset++; } } // Even though data and classifications are not really used add it onto the end so it is // still complete for in the event they end up being used in a later version dataLL.add(data); classificationsLL.add(classification); trainingDataLL.add(data); knownClassifications.add(classification); // Check to see if we have seen that classification yet int occurrences = 0; for (int i = 0; i < classificationTypes.size() && occurrences == 0; i++) { if (classificationTypes.get(i).compareTo(classification) == 0) { occurrences = 1; } } if (occurrences == 0) { classificationTypes.add(classification); } } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }
/** * xlsxファイルを読み込み<b>XSSFWorkbook</b>を生成する * * @param filePath ファイルパス * @return <b>XSSFWorkbook</b> */ public static XSSFWorkbook load(String filePath) { try { InputStream inputStream = new FileInputStream(filePath); XSSFWorkbook workbook = new XSSFWorkbook(inputStream); return workbook; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; }
/** * 导出对象到Excel,不是基于模板的,直接新建一个Excel完成导出,基于流 * * @param os 输出流 * @param objs 对象列表 * @param clz 对象类型 * @param isXssf 是否是2007版本 */ public void exportExcelByPath( OutputStream os, List objs, Class clz, boolean isXssf, String message) { try { Workbook wb = handleExcel(objs, clz, isXssf, message); wb.write(os); } catch (FileNotFoundException e) { e.printStackTrace(); logger.error(e); } catch (IOException e) { e.printStackTrace(); logger.error(e); } }
/** * 从文件路径读取相应的Excel文件到对象列表 * * @param path 文件路径下的path * @param clz 对象类型 * @param readLine 开始行,注意是标题所在行 * @param tailLine 底部有多少行,在读入对象时,会减去这些行 * @return */ public List<Object> readExcelByPath(String path, Class clz, int readLine, int tailLine) { Workbook wb = null; try { wb = WorkbookFactory.create(new File(path)); return readExcel(wb, clz, readLine, tailLine); } catch (FileNotFoundException e) { e.printStackTrace(); logger.error(e); } catch (InvalidFormatException e) { e.printStackTrace(); logger.error(e); } catch (IOException e) { e.printStackTrace(); logger.error(e); } return null; }
@Test public void test_writeFile() { String path = "D:\\test.xlsx"; // String path = "D:\\test.xls"; // String path = "D:/"; // String path = "D:\\"; long startTime = System.currentTimeMillis(); try { ExcelUtil.writeFile(path); } catch (IOException e) { e.printStackTrace(); } System.out.println("cost :" + (System.currentTimeMillis() - startTime)); }
/** * 导出对象到Excel,直接新建一个Excel完成导出,基于路径的导出,不基于模板 * * @param outPath 导出路径 * @param objs 对象列表 * @param clz 对象类型 * @param isXssf 是否是2007版本 */ public void exportExcelByPath( String outPath, List objs, Class clz, boolean isXssf, String message) { Workbook wb = handleExcel(objs, clz, isXssf, message); FileOutputStream fos = null; try { fos = new FileOutputStream(outPath); wb.toString().getBytes("GB2312"); wb.write(fos); } catch (FileNotFoundException e) { e.printStackTrace(); logger.error(e); } catch (IOException e) { e.printStackTrace(); logger.error(e); } finally { try { if (fos != null) fos.close(); } catch (IOException e) { e.printStackTrace(); logger.error(e); } } }
public static void writeExcelFile(String fileLocation) { try { FileOutputStream fileOut = new FileOutputStream(fileLocation); HSSFWorkbook workbook = new HSSFWorkbook(); Font bold = workbook.createFont(); // Create font bold.setBoldweight(Font.BOLDWEIGHT_BOLD); // Make font bold CellStyle correctCell = workbook.createCellStyle(); correctCell.setFillForegroundColor(HSSFColor.GREEN.index); correctCell.setFillBackgroundColor(HSSFColor.GREEN.index); correctCell.setFillPattern(CellStyle.SOLID_FOREGROUND); CellStyle incorrectCell = workbook.createCellStyle(); incorrectCell.setFillForegroundColor(HSSFColor.RED.index); incorrectCell.setFillBackgroundColor(HSSFColor.RED.index); incorrectCell.setFillPattern(CellStyle.SOLID_FOREGROUND); CellStyle classificationCells = workbook.createCellStyle(); classificationCells.setFillForegroundColor(HSSFColor.YELLOW.index); classificationCells.setFillBackgroundColor(HSSFColor.YELLOW.index); classificationCells.setFillPattern(CellStyle.SOLID_FOREGROUND); CellStyle attributeNameCells = workbook.createCellStyle(); attributeNameCells.setFont(bold); CellStyle classificationAttributeCell = workbook.createCellStyle(); classificationAttributeCell.setFillForegroundColor(HSSFColor.YELLOW.index); classificationAttributeCell.setFillBackgroundColor(HSSFColor.YELLOW.index); classificationAttributeCell.setFillPattern(CellStyle.SOLID_FOREGROUND); classificationAttributeCell.setFont(bold); Sheet worksheet = workbook.createSheet("Results"); Row currRow = worksheet.createRow(0); for (int attribute = 0; attribute < metadataLL.size() + 1; attribute++) { Cell currCell = currRow.createCell(attribute); if (attribute < metadataLL.size()) { currCell.setCellValue(metadataLL.get(attribute)[0]); if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) { currCell.setCellStyle(classificationAttributeCell); } else { currCell.setCellStyle(attributeNameCells); } } else { currCell.setCellValue("Guessed Classification"); currCell.setCellStyle(attributeNameCells); } } int correct = 0; int incorrect = 0; for (int node = 0; node < testDataLL.size(); node++) { currRow = worksheet.createRow(node + 1); // Offset by one since first row is header data int classifierCompleted = 0; // Necessary for if data does not end in classifier for (int attribute = 0; attribute < metadataLL.size() + 2; attribute++) // +1 for the row for guessed data +1 for the row that contains { Cell currCell = currRow.createCell(attribute); if (attribute < metadataLL.size()) // Print testingData { if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) { currCell.setCellValue(actualClassifications.get(node)); currCell.setCellStyle(classificationCells); classifierCompleted++; } else { currCell.setCellValue(testDataLL.get(node)[attribute - classifierCompleted]); } } else if (attribute == metadataLL.size()) // Print guessed classification { currCell.setCellValue(guessedClassifications.get(node)); if (guessedClassifications.get(node).compareTo(actualClassifications.get(node)) == 0) { currCell.setCellStyle(correctCell); correct++; } else { currCell.setCellStyle(incorrectCell); incorrect++; } if (node == testDataLL.size() - 1) // If this is the last loop { double precentRight = (double) correct / (correct + incorrect); currRow = worksheet.createRow(node + 2); currCell = currRow.createCell(attribute); currCell.setCellValue(precentRight); if (precentRight > .90) { correctCell.setDataFormat(workbook.createDataFormat().getFormat("0.000%")); currCell.setCellStyle(correctCell); } else { incorrectCell.setDataFormat(workbook.createDataFormat().getFormat("0.000%")); currCell.setCellStyle(incorrectCell); } } } else if (attribute == metadataLL.size() + 1) // Print potential bad training data if the flag is true { if (unseenDataFlag.get(node)) { currCell.setCellValue( "This node an attribute value not in the training set, classifier selected is based on most frequent classifier. If laplacian smoothing is 1 or more this likely wont happen"); // TODO make this a bit shorter } } } } worksheet = workbook.createSheet("Training Data"); currRow = worksheet.createRow(0); for (int attribute = 0; attribute < metadataLL.size(); attribute++) { Cell currCell = currRow.createCell(attribute); currCell.setCellValue(metadataLL.get(attribute)[0]); currCell.setCellStyle(attributeNameCells); } for (int node = 0; node < trainingDataLL.size(); node++) { currRow = worksheet.createRow(node + 1); // Offset by one since first row is header data int classifierCompleted = 0; // Necessary for if data does not end in classifier for (int attribute = 0; attribute < metadataLL.size(); attribute++) { Cell currCell = currRow.createCell(attribute); if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) { currCell.setCellValue(knownClassifications.get(node)); classifierCompleted++; } else { currCell.setCellValue(trainingDataLL.get(node)[attribute - classifierCompleted]); } } } worksheet = workbook.createSheet("Likelihood"); currRow = worksheet.createRow(0); int largestAttributeSize = 0; for (int attribute = 0; attribute < classifier.size(); attribute++) { if (classifier.get(attribute).size() > largestAttributeSize) { largestAttributeSize = classifier.get(attribute).size(); } } // Label attributes along the top for (int i = 0; i < metadataLL.size(); i++) { if (i == 0) { Cell currCell = currRow.createCell(i); // currCell.setCellValue("Attributes"); currCell.setCellStyle(attributeNameCells); } else { Cell currCell = currRow.createCell(i); currCell.setCellValue( metadataLL .get(i - 1)[0]); // -1 since the first cell does not contain a attribute name currCell.setCellStyle(attributeNameCells); } } // List possible classifications on the side and classification likelihoods at the end for (int i = 0; i < (largestAttributeSize * (classificationTypes.size() + 1) + classificationTypes.size() + 1); i++) // +1 since the first row of each stride lists each attributes string of what // occurrence the likelihoods are displaying { // +classificationTypes.size() so we can list the classification types likelihood at the end currRow = worksheet.createRow(i + 1); // +1 since first row is attribute names Cell currCell = currRow.createCell(0); int currentClassificationType = i % (classificationTypes.size() + 1); // +1 since the first row of each stride lists each attributes string of // what occurrence the likelihoods are displaying // List the classification type of each row along the side if (i < largestAttributeSize * (classificationTypes.size() + 1)) // +1 since the first row of each stride lists each attributes string of // what occurrence the likelihoods are displaying { for (int j = 0; j < classificationTypes.size() + 1; j++) // +1 since the first row of each stride lists each attributes string of what // occurrence the likelihoods are displaying { if (currentClassificationType == 0) { // Do nothing for now may have it say something later } else if (currentClassificationType == j) { currCell.setCellValue( classificationTypes.get( j - 1)); // -1 since the first cell does not contain a classification type } } } else // List the likelihood of each classification at the end { for (int j = 0; j < classificationTypes.size() + 1; j++) // +1 since the first row of each stride lists each attributes string of what // occurrence the likelihoods are displaying { if (currentClassificationType == 0) { // Do nothing for now may have it say value later } else if (currentClassificationType == j) { currCell.setCellValue( "Likelihood of: " + classificationTypes.get(j - 1) + " is " + classificationLikelihood.get( j - 1)); // -1 since the first cell does not contain a classification type } } } currCell.setCellStyle(attributeNameCells); } // List the data for (int attribute = 0; attribute < classifier.size(); attribute++) { for (int occurrences = 0; occurrences < classifier.get(attribute).size(); occurrences++) { for (int classification = 0; classification < classifier.get(attribute).get(occurrences).length; classification++) { currRow = worksheet.getRow( (occurrences * classifier.get(attribute).get(occurrences).length + classification) + 1); // +1 since first row is attribute names Cell currCell = currRow.createCell( (attribute) + 1); // TODO figure out why this errors out at i:0 j:4 k:0 // largestAttributeSize:105 on kidney dataset currCell.setCellValue(classifier.get(attribute).get(occurrences)[classification]); } } } workbook.write(fileOut); workbook.close(); workbook.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); e.printStackTrace(); System.exit(0); } catch (IOException e) { System.out.println("Unable to output file, is the output destination writelocked?"); e.printStackTrace(); System.exit(0); } }
public static void readExcelFile(String fileName) { FileInputStream file; try { file = new FileInputStream(new File(fileName)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet // Set just in case metadata is incomplete or malformed classificationLocation = sheet1.getRow(0).getPhysicalNumberOfCells() - 1; // Minus one since classificationLocation includes 0 and getPhysicalNumberOfCells // does not Sheet sheet2 = excelFile.getSheetAt(1); // Metadata sheet // Loop based on number of attribute names for (int i = 0; i < sheet2.getRow(0).getPhysicalNumberOfCells(); i++) { String[] metadata = new String[METADATASIZE]; // Construct metadata Row currRow = sheet2.getRow(0); // This should be a row of names metadata[0] = currRow.getCell(i).toString(); currRow = sheet2.getRow(1); // This should be a row of data types (discrete or continuous) metadata[1] = currRow.getCell(i).toString(); currRow = sheet2.getRow(2); // This should say which one is the classifier if (currRow.getCell(i) == null || currRow.getCell(i).getCellType() == Cell.CELL_TYPE_BLANK) { metadata[2] = "attribute"; } else { metadata[2] = "classifier"; classificationLocation = i; } metadataLL.add(metadata); } for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classificationsLL.add(cell.toString()); // Moved to generate training data so that we do not get possible classifications from // unknown data since some denote unknown by saying ? // //Check to see if we have seen it yet // // int occurrences = 0; // for(int i = 0; i < classificationTypes.size(); i++) // { // if(classificationTypes.get(i).compareTo(cell.toString()) == 0) // { // occurrences++; // } // } // if(occurrences == 0) // { // classificationTypes.add(cell.toString()); // } offset++; } } dataLL.add(data); // classCount = temp.length; } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }