public static void generateTrainingDataFromFile( String fileLocation) // Requires that the original file had the metadata and requires that this // file is formated the same in first sheet { testDataLL = (LinkedList<String[]>) dataLL.clone(); actualClassifications = (LinkedList<String>) classificationsLL.clone(); FileInputStream file; try { file = new FileInputStream(new File(fileLocation)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; String classification = ""; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classification = cell.toString(); offset++; } } // Even though data and classifications are not really used add it onto the end so it is // still complete for in the event they end up being used in a later version dataLL.add(data); classificationsLL.add(classification); trainingDataLL.add(data); knownClassifications.add(classification); // Check to see if we have seen that classification yet int occurrences = 0; for (int i = 0; i < classificationTypes.size() && occurrences == 0; i++) { if (classificationTypes.get(i).compareTo(classification) == 0) { occurrences = 1; } } if (occurrences == 0) { classificationTypes.add(classification); } } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }
public static List<List<List<String>>> readExcel(File file, Rule rule) { int start = rule.getStart(); int end = rule.getEnd(); List<List<List<String>>> result = Lists.newArrayList(); Workbook wb; try { wb = WorkbookFactory.create(file); } catch (Exception e) { throw new ExcelException(e); } for (int i = 0; i < wb.getNumberOfSheets(); i++) { Sheet sheet = wb.getSheetAt(i); List<List<String>> sheetList = Lists.newArrayList(); int rows = sheet.getLastRowNum(); if (start <= sheet.getFirstRowNum()) { start = sheet.getFirstRowNum(); } if (end >= rows) { end = rows; } else if (end <= 0) { end = rows + end; } for (int rowIndex = start; rowIndex <= end; rowIndex++) { Row row = sheet.getRow(rowIndex); List<String> columns = Lists.newArrayList(); int cellNum = row.getLastCellNum(); System.out.println(row.getLastCellNum()); System.out.println(row.getPhysicalNumberOfCells()); for (int cellIndex = row.getFirstCellNum(); cellIndex < cellNum; cellIndex++) { Cell cell = row.getCell(cellIndex); int cellType = cell.getCellType(); String column = ""; switch (cellType) { case Cell.CELL_TYPE_NUMERIC: // DecimalFormat format = new DecimalFormat(); // format.setGroupingUsed(false); column = String.valueOf(cell.getDateCellValue()); break; case Cell.CELL_TYPE_STRING: column = cell.getStringCellValue(); break; case Cell.CELL_TYPE_BOOLEAN: column = cell.getBooleanCellValue() + ""; break; case Cell.CELL_TYPE_FORMULA: column = cell.getCellFormula(); break; case Cell.CELL_TYPE_ERROR: case Cell.CELL_TYPE_BLANK: column = " "; break; default: } columns.add(column.trim()); } List<Boolean> rowFilterFlagList = Lists.newArrayList(); List<RowFilter> rowFilterList = Lists.newArrayList(); for (int k = 0; k < rowFilterList.size(); k++) { RowFilter rowFilter = rowFilterList.get(k); rowFilterFlagList.add(rowFilter.doFilter(rowIndex, columns)); } if (!rowFilterFlagList.contains(false)) { sheetList.add(columns); } } result.add(sheetList); } return result; }
/** * 将excel内的内容读取到xml文件中,并添加dtd验证 * * @param xmlFile * @param sheetNum * @return 1代表成功,0失败,-1超过最大sheet,2跳过当前失败的xml */ public int excelToXml(String xmlFile, int sheetNum) { if (sheetNum >= workBook.getNumberOfSheets()) return -1; else sheet = workBook.getSheetAt(sheetNum); xmlFile = xmlFile + ".xml"; try { Document document = DocumentHelper.createDocument(); // 使用sheet名称命名跟节点 String rootName = sheet.getSheetName().replaceAll(" ", ""); Element root = document.addElement(rootName); // 添加dtd文件说明 DocumentType documentType = new DOMDocumentType(); documentType.setElementName(rootName); List<ElementDecl> declList = new ArrayList<>(); declList.add(new ElementDecl(rootName, "(row*)")); // 判断sheet是否为空,为空则不执行任何操作 if (sheet.getRow(0) == null) return 1; // 遍历sheet第一行,获取元素名称 row = sheet.getRow(0); String rowString = null; List<String> pcdataList = new ArrayList<>(); for (int y = 0; y < row.getPhysicalNumberOfCells(); y++) { Object object = this.getCellValueObject(0, y); // 判断是否有合并单元格,有的话跳过 if (object == null) return 2; // 去除表头字符串中的空格 String objectStr = object.toString().replaceAll(" ", ""); if (rowString != null) rowString += "|" + objectStr; else rowString = objectStr; pcdataList.add(objectStr); } // 设置行节点 declList.add(new ElementDecl("row", "(" + rowString + ")*")); // 遍历list设置行的下级节点 for (String tmp : pcdataList) { declList.add(new ElementDecl(tmp, "(#PCDATA)")); } documentType.setInternalDeclarations(declList); // 遍历读写excel数据到xml中 for (int x = 1; x < sheet.getLastRowNum(); x++) { row = sheet.getRow(x); Element rowElement = root.addElement("row"); for (int y = 0; y < row.getPhysicalNumberOfCells(); y++) { // cell = row.getCell(y); Object object = this.getCellValueObject(x, y); if (object != null) { // 将sheet第一行的行首元素当作元素名称 String pcdataString = pcdataList.get(y); Element element = rowElement.addElement(pcdataString); // Element element = rowElement.addElement("name"); element.setText(object.toString()); } } } // 写入文件和dtd document.setDocType(documentType); this.docToXmlFile(document, xmlFile); } catch (Exception e) { e.printStackTrace(); } return 1; }
private static void confirmEmptyRow(Sheet s, int rowIx) { Row row = s.getRow(rowIx); assertTrue(row == null || row.getPhysicalNumberOfCells() == 0); }
public static void readExcelFile(String fileName) { FileInputStream file; try { file = new FileInputStream(new File(fileName)); Workbook excelFile = new HSSFWorkbook(file); Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet // Set just in case metadata is incomplete or malformed classificationLocation = sheet1.getRow(0).getPhysicalNumberOfCells() - 1; // Minus one since classificationLocation includes 0 and getPhysicalNumberOfCells // does not Sheet sheet2 = excelFile.getSheetAt(1); // Metadata sheet // Loop based on number of attribute names for (int i = 0; i < sheet2.getRow(0).getPhysicalNumberOfCells(); i++) { String[] metadata = new String[METADATASIZE]; // Construct metadata Row currRow = sheet2.getRow(0); // This should be a row of names metadata[0] = currRow.getCell(i).toString(); currRow = sheet2.getRow(1); // This should be a row of data types (discrete or continuous) metadata[1] = currRow.getCell(i).toString(); currRow = sheet2.getRow(2); // This should say which one is the classifier if (currRow.getCell(i) == null || currRow.getCell(i).getCellType() == Cell.CELL_TYPE_BLANK) { metadata[2] = "attribute"; } else { metadata[2] = "classifier"; classificationLocation = i; } metadataLL.add(metadata); } for (Row row : sheet1) { String data[] = new String[row.getPhysicalNumberOfCells() - 1]; int offset = 0; // Used so that we can declare an array of the size of the attributes without the // classification for (Cell cell : row) { int index = cell.getColumnIndex(); if (classificationLocation != index) { data[index - offset] = cell.toString(); } else { classificationsLL.add(cell.toString()); // Moved to generate training data so that we do not get possible classifications from // unknown data since some denote unknown by saying ? // //Check to see if we have seen it yet // // int occurrences = 0; // for(int i = 0; i < classificationTypes.size(); i++) // { // if(classificationTypes.get(i).compareTo(cell.toString()) == 0) // { // occurrences++; // } // } // if(occurrences == 0) // { // classificationTypes.add(cell.toString()); // } offset++; } } dataLL.add(data); // classCount = temp.length; } excelFile.close(); } catch (FileNotFoundException e) { System.out.println("Error file not found"); System.exit(0); } catch (IOException e) { System.out.println("Unable to read file, disk drive may be failing"); e.printStackTrace(); System.exit(0); } }