Beispiel #1
0
  public static void generateTrainingDataFromFile(
      String
          fileLocation) // Requires that the original file had the metadata and requires that this
        // file is formated the same in first sheet
      {
    testDataLL = (LinkedList<String[]>) dataLL.clone();
    actualClassifications = (LinkedList<String>) classificationsLL.clone();

    FileInputStream file;
    try {
      file = new FileInputStream(new File(fileLocation));
      Workbook excelFile = new HSSFWorkbook(file);
      Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet
      for (Row row : sheet1) {
        String data[] = new String[row.getPhysicalNumberOfCells() - 1];
        String classification = "";

        int offset =
            0; // Used so that we can declare an array of the size of the attributes without the
        // classification
        for (Cell cell : row) {
          int index = cell.getColumnIndex();
          if (classificationLocation != index) {
            data[index - offset] = cell.toString();
          } else {
            classification = cell.toString();
            offset++;
          }
        }

        // Even though data and classifications are not really used add it onto the end so it is
        // still complete for in the event they end up being used in a later version
        dataLL.add(data);
        classificationsLL.add(classification);

        trainingDataLL.add(data);
        knownClassifications.add(classification);

        // Check to see if we have seen that classification yet
        int occurrences = 0;
        for (int i = 0; i < classificationTypes.size() && occurrences == 0; i++) {
          if (classificationTypes.get(i).compareTo(classification) == 0) {
            occurrences = 1;
          }
        }
        if (occurrences == 0) {
          classificationTypes.add(classification);
        }
      }
      excelFile.close();
    } catch (FileNotFoundException e) {
      System.out.println("Error file not found");
      System.exit(0);
    } catch (IOException e) {
      System.out.println("Unable to read file, disk drive may be failing");
      e.printStackTrace();
      System.exit(0);
    }
  }
Beispiel #2
0
  public static void readExcelFile(String fileName) {
    FileInputStream file;
    try {
      file = new FileInputStream(new File(fileName));
      Workbook excelFile = new HSSFWorkbook(file);

      Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet
      // Set just in case metadata is incomplete or malformed
      classificationLocation =
          sheet1.getRow(0).getPhysicalNumberOfCells()
              - 1; // Minus one since classificationLocation includes 0 and getPhysicalNumberOfCells
      // does not

      Sheet sheet2 = excelFile.getSheetAt(1); // Metadata sheet
      // Loop based on number of attribute names
      for (int i = 0; i < sheet2.getRow(0).getPhysicalNumberOfCells(); i++) {
        String[] metadata = new String[METADATASIZE];

        // Construct metadata
        Row currRow = sheet2.getRow(0); // This should be a row of names
        metadata[0] = currRow.getCell(i).toString();
        currRow = sheet2.getRow(1); // This should be a row of data types (discrete or continuous)
        metadata[1] = currRow.getCell(i).toString();
        currRow = sheet2.getRow(2); // This should say which one is the classifier
        if (currRow.getCell(i) == null
            || currRow.getCell(i).getCellType() == Cell.CELL_TYPE_BLANK) {
          metadata[2] = "attribute";
        } else {
          metadata[2] = "classifier";
          classificationLocation = i;
        }

        metadataLL.add(metadata);
      }

      for (Row row : sheet1) {
        String data[] = new String[row.getPhysicalNumberOfCells() - 1];
        int offset =
            0; // Used so that we can declare an array of the size of the attributes without the
        // classification
        for (Cell cell : row) {
          int index = cell.getColumnIndex();
          if (classificationLocation != index) {
            data[index - offset] = cell.toString();
          } else {
            classificationsLL.add(cell.toString());

            // Moved to generate training data so that we do not get possible classifications from
            // unknown data since some denote unknown by saying ?

            //						//Check to see if we have seen it yet
            //
            //						int occurrences = 0;
            //						for(int i = 0; i < classificationTypes.size(); i++)
            //						{
            //							if(classificationTypes.get(i).compareTo(cell.toString()) == 0)
            //							{
            //								occurrences++;
            //							}
            //						}
            //						if(occurrences == 0)
            //						{
            //							classificationTypes.add(cell.toString());
            //						}
            offset++;
          }
        }
        dataLL.add(data);
        // classCount = temp.length;
      }

      excelFile.close();
    } catch (FileNotFoundException e) {
      System.out.println("Error file not found");
      System.exit(0);
    } catch (IOException e) {
      System.out.println("Unable to read file, disk drive may be failing");
      e.printStackTrace();
      System.exit(0);
    }
  }