Exemplo n.º 1
0
  public static void generateTrainingDataFromFile(
      String
          fileLocation) // Requires that the original file had the metadata and requires that this
        // file is formated the same in first sheet
      {
    testDataLL = (LinkedList<String[]>) dataLL.clone();
    actualClassifications = (LinkedList<String>) classificationsLL.clone();

    FileInputStream file;
    try {
      file = new FileInputStream(new File(fileLocation));
      Workbook excelFile = new HSSFWorkbook(file);
      Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet
      for (Row row : sheet1) {
        String data[] = new String[row.getPhysicalNumberOfCells() - 1];
        String classification = "";

        int offset =
            0; // Used so that we can declare an array of the size of the attributes without the
        // classification
        for (Cell cell : row) {
          int index = cell.getColumnIndex();
          if (classificationLocation != index) {
            data[index - offset] = cell.toString();
          } else {
            classification = cell.toString();
            offset++;
          }
        }

        // Even though data and classifications are not really used add it onto the end so it is
        // still complete for in the event they end up being used in a later version
        dataLL.add(data);
        classificationsLL.add(classification);

        trainingDataLL.add(data);
        knownClassifications.add(classification);

        // Check to see if we have seen that classification yet
        int occurrences = 0;
        for (int i = 0; i < classificationTypes.size() && occurrences == 0; i++) {
          if (classificationTypes.get(i).compareTo(classification) == 0) {
            occurrences = 1;
          }
        }
        if (occurrences == 0) {
          classificationTypes.add(classification);
        }
      }
      excelFile.close();
    } catch (FileNotFoundException e) {
      System.out.println("Error file not found");
      System.exit(0);
    } catch (IOException e) {
      System.out.println("Unable to read file, disk drive may be failing");
      e.printStackTrace();
      System.exit(0);
    }
  }
Exemplo n.º 2
0
  public static List<List<List<String>>> readExcel(File file, Rule rule) {
    int start = rule.getStart();
    int end = rule.getEnd();
    List<List<List<String>>> result = Lists.newArrayList();
    Workbook wb;
    try {
      wb = WorkbookFactory.create(file);
    } catch (Exception e) {
      throw new ExcelException(e);
    }
    for (int i = 0; i < wb.getNumberOfSheets(); i++) {
      Sheet sheet = wb.getSheetAt(i);
      List<List<String>> sheetList = Lists.newArrayList();
      int rows = sheet.getLastRowNum();
      if (start <= sheet.getFirstRowNum()) {
        start = sheet.getFirstRowNum();
      }
      if (end >= rows) {
        end = rows;
      } else if (end <= 0) {
        end = rows + end;
      }
      for (int rowIndex = start; rowIndex <= end; rowIndex++) {
        Row row = sheet.getRow(rowIndex);
        List<String> columns = Lists.newArrayList();
        int cellNum = row.getLastCellNum();
        System.out.println(row.getLastCellNum());
        System.out.println(row.getPhysicalNumberOfCells());
        for (int cellIndex = row.getFirstCellNum(); cellIndex < cellNum; cellIndex++) {
          Cell cell = row.getCell(cellIndex);
          int cellType = cell.getCellType();
          String column = "";
          switch (cellType) {
            case Cell.CELL_TYPE_NUMERIC:
              //                            DecimalFormat format = new DecimalFormat();
              //                            format.setGroupingUsed(false);
              column = String.valueOf(cell.getDateCellValue());
              break;
            case Cell.CELL_TYPE_STRING:
              column = cell.getStringCellValue();
              break;
            case Cell.CELL_TYPE_BOOLEAN:
              column = cell.getBooleanCellValue() + "";
              break;
            case Cell.CELL_TYPE_FORMULA:
              column = cell.getCellFormula();
              break;
            case Cell.CELL_TYPE_ERROR:

            case Cell.CELL_TYPE_BLANK:
              column = " ";
              break;
            default:
          }
          columns.add(column.trim());
        }

        List<Boolean> rowFilterFlagList = Lists.newArrayList();
        List<RowFilter> rowFilterList = Lists.newArrayList();
        for (int k = 0; k < rowFilterList.size(); k++) {
          RowFilter rowFilter = rowFilterList.get(k);
          rowFilterFlagList.add(rowFilter.doFilter(rowIndex, columns));
        }
        if (!rowFilterFlagList.contains(false)) {
          sheetList.add(columns);
        }
      }
      result.add(sheetList);
    }
    return result;
  }
Exemplo n.º 3
0
 /**
  * 将excel内的内容读取到xml文件中,并添加dtd验证
  *
  * @param xmlFile
  * @param sheetNum
  * @return 1代表成功,0失败,-1超过最大sheet,2跳过当前失败的xml
  */
 public int excelToXml(String xmlFile, int sheetNum) {
   if (sheetNum >= workBook.getNumberOfSheets()) return -1;
   else sheet = workBook.getSheetAt(sheetNum);
   xmlFile = xmlFile + ".xml";
   try {
     Document document = DocumentHelper.createDocument();
     // 使用sheet名称命名跟节点
     String rootName = sheet.getSheetName().replaceAll(" ", "");
     Element root = document.addElement(rootName);
     // 添加dtd文件说明
     DocumentType documentType = new DOMDocumentType();
     documentType.setElementName(rootName);
     List<ElementDecl> declList = new ArrayList<>();
     declList.add(new ElementDecl(rootName, "(row*)"));
     // 判断sheet是否为空,为空则不执行任何操作
     if (sheet.getRow(0) == null) return 1;
     // 遍历sheet第一行,获取元素名称
     row = sheet.getRow(0);
     String rowString = null;
     List<String> pcdataList = new ArrayList<>();
     for (int y = 0; y < row.getPhysicalNumberOfCells(); y++) {
       Object object = this.getCellValueObject(0, y);
       // 判断是否有合并单元格,有的话跳过
       if (object == null) return 2;
       // 去除表头字符串中的空格
       String objectStr = object.toString().replaceAll(" ", "");
       if (rowString != null) rowString += "|" + objectStr;
       else rowString = objectStr;
       pcdataList.add(objectStr);
     }
     // 设置行节点
     declList.add(new ElementDecl("row", "(" + rowString + ")*"));
     // 遍历list设置行的下级节点
     for (String tmp : pcdataList) {
       declList.add(new ElementDecl(tmp, "(#PCDATA)"));
     }
     documentType.setInternalDeclarations(declList);
     // 遍历读写excel数据到xml中
     for (int x = 1; x < sheet.getLastRowNum(); x++) {
       row = sheet.getRow(x);
       Element rowElement = root.addElement("row");
       for (int y = 0; y < row.getPhysicalNumberOfCells(); y++) {
         // cell = row.getCell(y);
         Object object = this.getCellValueObject(x, y);
         if (object != null) {
           // 将sheet第一行的行首元素当作元素名称
           String pcdataString = pcdataList.get(y);
           Element element = rowElement.addElement(pcdataString);
           // Element element = rowElement.addElement("name");
           element.setText(object.toString());
         }
       }
     }
     // 写入文件和dtd
     document.setDocType(documentType);
     this.docToXmlFile(document, xmlFile);
   } catch (Exception e) {
     e.printStackTrace();
   }
   return 1;
 }
Exemplo n.º 4
0
 private static void confirmEmptyRow(Sheet s, int rowIx) {
   Row row = s.getRow(rowIx);
   assertTrue(row == null || row.getPhysicalNumberOfCells() == 0);
 }
Exemplo n.º 5
0
  public static void readExcelFile(String fileName) {
    FileInputStream file;
    try {
      file = new FileInputStream(new File(fileName));
      Workbook excelFile = new HSSFWorkbook(file);

      Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet
      // Set just in case metadata is incomplete or malformed
      classificationLocation =
          sheet1.getRow(0).getPhysicalNumberOfCells()
              - 1; // Minus one since classificationLocation includes 0 and getPhysicalNumberOfCells
      // does not

      Sheet sheet2 = excelFile.getSheetAt(1); // Metadata sheet
      // Loop based on number of attribute names
      for (int i = 0; i < sheet2.getRow(0).getPhysicalNumberOfCells(); i++) {
        String[] metadata = new String[METADATASIZE];

        // Construct metadata
        Row currRow = sheet2.getRow(0); // This should be a row of names
        metadata[0] = currRow.getCell(i).toString();
        currRow = sheet2.getRow(1); // This should be a row of data types (discrete or continuous)
        metadata[1] = currRow.getCell(i).toString();
        currRow = sheet2.getRow(2); // This should say which one is the classifier
        if (currRow.getCell(i) == null
            || currRow.getCell(i).getCellType() == Cell.CELL_TYPE_BLANK) {
          metadata[2] = "attribute";
        } else {
          metadata[2] = "classifier";
          classificationLocation = i;
        }

        metadataLL.add(metadata);
      }

      for (Row row : sheet1) {
        String data[] = new String[row.getPhysicalNumberOfCells() - 1];
        int offset =
            0; // Used so that we can declare an array of the size of the attributes without the
        // classification
        for (Cell cell : row) {
          int index = cell.getColumnIndex();
          if (classificationLocation != index) {
            data[index - offset] = cell.toString();
          } else {
            classificationsLL.add(cell.toString());

            // Moved to generate training data so that we do not get possible classifications from
            // unknown data since some denote unknown by saying ?

            //						//Check to see if we have seen it yet
            //
            //						int occurrences = 0;
            //						for(int i = 0; i < classificationTypes.size(); i++)
            //						{
            //							if(classificationTypes.get(i).compareTo(cell.toString()) == 0)
            //							{
            //								occurrences++;
            //							}
            //						}
            //						if(occurrences == 0)
            //						{
            //							classificationTypes.add(cell.toString());
            //						}
            offset++;
          }
        }
        dataLL.add(data);
        // classCount = temp.length;
      }

      excelFile.close();
    } catch (FileNotFoundException e) {
      System.out.println("Error file not found");
      System.exit(0);
    } catch (IOException e) {
      System.out.println("Unable to read file, disk drive may be failing");
      e.printStackTrace();
      System.exit(0);
    }
  }