Esempio n. 1
0
  public static void generateTrainingDataFromFile(
      String
          fileLocation) // Requires that the original file had the metadata and requires that this
        // file is formated the same in first sheet
      {
    testDataLL = (LinkedList<String[]>) dataLL.clone();
    actualClassifications = (LinkedList<String>) classificationsLL.clone();

    FileInputStream file;
    try {
      file = new FileInputStream(new File(fileLocation));
      Workbook excelFile = new HSSFWorkbook(file);
      Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet
      for (Row row : sheet1) {
        String data[] = new String[row.getPhysicalNumberOfCells() - 1];
        String classification = "";

        int offset =
            0; // Used so that we can declare an array of the size of the attributes without the
        // classification
        for (Cell cell : row) {
          int index = cell.getColumnIndex();
          if (classificationLocation != index) {
            data[index - offset] = cell.toString();
          } else {
            classification = cell.toString();
            offset++;
          }
        }

        // Even though data and classifications are not really used add it onto the end so it is
        // still complete for in the event they end up being used in a later version
        dataLL.add(data);
        classificationsLL.add(classification);

        trainingDataLL.add(data);
        knownClassifications.add(classification);

        // Check to see if we have seen that classification yet
        int occurrences = 0;
        for (int i = 0; i < classificationTypes.size() && occurrences == 0; i++) {
          if (classificationTypes.get(i).compareTo(classification) == 0) {
            occurrences = 1;
          }
        }
        if (occurrences == 0) {
          classificationTypes.add(classification);
        }
      }
      excelFile.close();
    } catch (FileNotFoundException e) {
      System.out.println("Error file not found");
      System.exit(0);
    } catch (IOException e) {
      System.out.println("Unable to read file, disk drive may be failing");
      e.printStackTrace();
      System.exit(0);
    }
  }
Esempio n. 2
0
  public static void writeExcelFile(String fileLocation) {
    try {
      FileOutputStream fileOut = new FileOutputStream(fileLocation);

      HSSFWorkbook workbook = new HSSFWorkbook();

      Font bold = workbook.createFont(); // Create font
      bold.setBoldweight(Font.BOLDWEIGHT_BOLD); // Make font bold

      CellStyle correctCell = workbook.createCellStyle();
      correctCell.setFillForegroundColor(HSSFColor.GREEN.index);
      correctCell.setFillBackgroundColor(HSSFColor.GREEN.index);
      correctCell.setFillPattern(CellStyle.SOLID_FOREGROUND);

      CellStyle incorrectCell = workbook.createCellStyle();
      incorrectCell.setFillForegroundColor(HSSFColor.RED.index);
      incorrectCell.setFillBackgroundColor(HSSFColor.RED.index);
      incorrectCell.setFillPattern(CellStyle.SOLID_FOREGROUND);

      CellStyle classificationCells = workbook.createCellStyle();
      classificationCells.setFillForegroundColor(HSSFColor.YELLOW.index);
      classificationCells.setFillBackgroundColor(HSSFColor.YELLOW.index);
      classificationCells.setFillPattern(CellStyle.SOLID_FOREGROUND);

      CellStyle attributeNameCells = workbook.createCellStyle();
      attributeNameCells.setFont(bold);

      CellStyle classificationAttributeCell = workbook.createCellStyle();
      classificationAttributeCell.setFillForegroundColor(HSSFColor.YELLOW.index);
      classificationAttributeCell.setFillBackgroundColor(HSSFColor.YELLOW.index);
      classificationAttributeCell.setFillPattern(CellStyle.SOLID_FOREGROUND);
      classificationAttributeCell.setFont(bold);

      Sheet worksheet = workbook.createSheet("Results");
      Row currRow = worksheet.createRow(0);
      for (int attribute = 0; attribute < metadataLL.size() + 1; attribute++) {
        Cell currCell = currRow.createCell(attribute);
        if (attribute < metadataLL.size()) {
          currCell.setCellValue(metadataLL.get(attribute)[0]);
          if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) {
            currCell.setCellStyle(classificationAttributeCell);
          } else {
            currCell.setCellStyle(attributeNameCells);
          }
        } else {
          currCell.setCellValue("Guessed Classification");
          currCell.setCellStyle(attributeNameCells);
        }
      }

      int correct = 0;
      int incorrect = 0;
      for (int node = 0; node < testDataLL.size(); node++) {
        currRow = worksheet.createRow(node + 1); // Offset by one since first row is header data
        int classifierCompleted = 0; // Necessary for if data does not end in classifier
        for (int attribute = 0;
            attribute < metadataLL.size() + 2;
            attribute++) // +1 for the row for guessed data +1 for the row that contains
        {
          Cell currCell = currRow.createCell(attribute);

          if (attribute < metadataLL.size()) // Print testingData
          {
            if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) {
              currCell.setCellValue(actualClassifications.get(node));
              currCell.setCellStyle(classificationCells);
              classifierCompleted++;
            } else {
              currCell.setCellValue(testDataLL.get(node)[attribute - classifierCompleted]);
            }
          } else if (attribute == metadataLL.size()) // Print guessed classification
          {
            currCell.setCellValue(guessedClassifications.get(node));
            if (guessedClassifications.get(node).compareTo(actualClassifications.get(node)) == 0) {
              currCell.setCellStyle(correctCell);
              correct++;
            } else {
              currCell.setCellStyle(incorrectCell);
              incorrect++;
            }

            if (node == testDataLL.size() - 1) // If this is the last loop
            {
              double precentRight = (double) correct / (correct + incorrect);

              currRow = worksheet.createRow(node + 2);
              currCell = currRow.createCell(attribute);

              currCell.setCellValue(precentRight);
              if (precentRight > .90) {
                correctCell.setDataFormat(workbook.createDataFormat().getFormat("0.000%"));
                currCell.setCellStyle(correctCell);
              } else {
                incorrectCell.setDataFormat(workbook.createDataFormat().getFormat("0.000%"));
                currCell.setCellStyle(incorrectCell);
              }
            }
          } else if (attribute
              == metadataLL.size() + 1) // Print potential bad training data if the flag is true
          {
            if (unseenDataFlag.get(node)) {
              currCell.setCellValue(
                  "This node an attribute value not in the training set, classifier selected is based on most frequent classifier. If laplacian smoothing is 1 or more this likely wont happen"); // TODO make this a bit shorter
            }
          }
        }
      }

      worksheet = workbook.createSheet("Training Data");
      currRow = worksheet.createRow(0);
      for (int attribute = 0; attribute < metadataLL.size(); attribute++) {
        Cell currCell = currRow.createCell(attribute);
        currCell.setCellValue(metadataLL.get(attribute)[0]);
        currCell.setCellStyle(attributeNameCells);
      }

      for (int node = 0; node < trainingDataLL.size(); node++) {
        currRow = worksheet.createRow(node + 1); // Offset by one since first row is header data
        int classifierCompleted = 0; // Necessary for if data does not end in classifier
        for (int attribute = 0; attribute < metadataLL.size(); attribute++) {
          Cell currCell = currRow.createCell(attribute);

          if (metadataLL.get(attribute)[2].compareTo("classifier") == 0) {
            currCell.setCellValue(knownClassifications.get(node));
            classifierCompleted++;
          } else {
            currCell.setCellValue(trainingDataLL.get(node)[attribute - classifierCompleted]);
          }
        }
      }

      worksheet = workbook.createSheet("Likelihood");
      currRow = worksheet.createRow(0);

      int largestAttributeSize = 0;

      for (int attribute = 0; attribute < classifier.size(); attribute++) {
        if (classifier.get(attribute).size() > largestAttributeSize) {
          largestAttributeSize = classifier.get(attribute).size();
        }
      }

      // Label attributes along the top
      for (int i = 0; i < metadataLL.size(); i++) {
        if (i == 0) {
          Cell currCell = currRow.createCell(i);
          // currCell.setCellValue("Attributes");
          currCell.setCellStyle(attributeNameCells);
        } else {
          Cell currCell = currRow.createCell(i);
          currCell.setCellValue(
              metadataLL
                  .get(i - 1)[0]); // -1 since the first cell does not contain a attribute name
          currCell.setCellStyle(attributeNameCells);
        }
      }

      // List possible classifications on the side and classification likelihoods at the end
      for (int i = 0;
          i
              < (largestAttributeSize * (classificationTypes.size() + 1)
                  + classificationTypes.size()
                  + 1);
          i++) // +1 since the first row of each stride lists each attributes string of what
      // occurrence the likelihoods are displaying
      { // +classificationTypes.size() so we can list the classification types likelihood at the end
        currRow = worksheet.createRow(i + 1); // +1 since first row is attribute names
        Cell currCell = currRow.createCell(0);

        int currentClassificationType =
            i
                % (classificationTypes.size()
                    + 1); // +1 since the first row of each stride lists each attributes string of
        // what occurrence the likelihoods are displaying

        // List the classification type of each row along the side
        if (i
            < largestAttributeSize
                * (classificationTypes.size()
                    + 1)) // +1 since the first row of each stride lists each attributes string of
        // what occurrence the likelihoods are displaying
        {
          for (int j = 0;
              j < classificationTypes.size() + 1;
              j++) // +1 since the first row of each stride lists each attributes string of what
          // occurrence the likelihoods are displaying
          {
            if (currentClassificationType == 0) {
              // Do nothing for now may have it say something later
            } else if (currentClassificationType == j) {
              currCell.setCellValue(
                  classificationTypes.get(
                      j - 1)); // -1 since the first cell does not contain a classification type
            }
          }
        } else // List the likelihood of each classification at the end
        {

          for (int j = 0;
              j < classificationTypes.size() + 1;
              j++) // +1 since the first row of each stride lists each attributes string of what
          // occurrence the likelihoods are displaying
          {
            if (currentClassificationType == 0) {
              // Do nothing for now may have it say value later
            } else if (currentClassificationType == j) {
              currCell.setCellValue(
                  "Likelihood of: "
                      + classificationTypes.get(j - 1)
                      + " is "
                      + classificationLikelihood.get(
                          j - 1)); // -1 since the first cell does not contain a classification type
            }
          }
        }
        currCell.setCellStyle(attributeNameCells);
      }

      // List the data
      for (int attribute = 0; attribute < classifier.size(); attribute++) {
        for (int occurrences = 0; occurrences < classifier.get(attribute).size(); occurrences++) {
          for (int classification = 0;
              classification < classifier.get(attribute).get(occurrences).length;
              classification++) {
            currRow =
                worksheet.getRow(
                    (occurrences * classifier.get(attribute).get(occurrences).length
                            + classification)
                        + 1); // +1 since first row is attribute names
            Cell currCell =
                currRow.createCell(
                    (attribute) + 1); // TODO figure out why this errors out at i:0 j:4 k:0
            // largestAttributeSize:105 on kidney dataset
            currCell.setCellValue(classifier.get(attribute).get(occurrences)[classification]);
          }
        }
      }

      workbook.write(fileOut);
      workbook.close();
      workbook.close();
    } catch (FileNotFoundException e) {
      System.out.println("Error file not found");
      e.printStackTrace();
      System.exit(0);
    } catch (IOException e) {
      System.out.println("Unable to output file, is the output destination writelocked?");
      e.printStackTrace();
      System.exit(0);
    }
  }
Esempio n. 3
0
  public static void readExcelFile(String fileName) {
    FileInputStream file;
    try {
      file = new FileInputStream(new File(fileName));
      Workbook excelFile = new HSSFWorkbook(file);

      Sheet sheet1 = excelFile.getSheetAt(0); // Data sheet
      // Set just in case metadata is incomplete or malformed
      classificationLocation =
          sheet1.getRow(0).getPhysicalNumberOfCells()
              - 1; // Minus one since classificationLocation includes 0 and getPhysicalNumberOfCells
      // does not

      Sheet sheet2 = excelFile.getSheetAt(1); // Metadata sheet
      // Loop based on number of attribute names
      for (int i = 0; i < sheet2.getRow(0).getPhysicalNumberOfCells(); i++) {
        String[] metadata = new String[METADATASIZE];

        // Construct metadata
        Row currRow = sheet2.getRow(0); // This should be a row of names
        metadata[0] = currRow.getCell(i).toString();
        currRow = sheet2.getRow(1); // This should be a row of data types (discrete or continuous)
        metadata[1] = currRow.getCell(i).toString();
        currRow = sheet2.getRow(2); // This should say which one is the classifier
        if (currRow.getCell(i) == null
            || currRow.getCell(i).getCellType() == Cell.CELL_TYPE_BLANK) {
          metadata[2] = "attribute";
        } else {
          metadata[2] = "classifier";
          classificationLocation = i;
        }

        metadataLL.add(metadata);
      }

      for (Row row : sheet1) {
        String data[] = new String[row.getPhysicalNumberOfCells() - 1];
        int offset =
            0; // Used so that we can declare an array of the size of the attributes without the
        // classification
        for (Cell cell : row) {
          int index = cell.getColumnIndex();
          if (classificationLocation != index) {
            data[index - offset] = cell.toString();
          } else {
            classificationsLL.add(cell.toString());

            // Moved to generate training data so that we do not get possible classifications from
            // unknown data since some denote unknown by saying ?

            //						//Check to see if we have seen it yet
            //
            //						int occurrences = 0;
            //						for(int i = 0; i < classificationTypes.size(); i++)
            //						{
            //							if(classificationTypes.get(i).compareTo(cell.toString()) == 0)
            //							{
            //								occurrences++;
            //							}
            //						}
            //						if(occurrences == 0)
            //						{
            //							classificationTypes.add(cell.toString());
            //						}
            offset++;
          }
        }
        dataLL.add(data);
        // classCount = temp.length;
      }

      excelFile.close();
    } catch (FileNotFoundException e) {
      System.out.println("Error file not found");
      System.exit(0);
    } catch (IOException e) {
      System.out.println("Unable to read file, disk drive may be failing");
      e.printStackTrace();
      System.exit(0);
    }
  }