Beispiel #1
0
  /**
   * This method finds columns by taking the lines in the data variable and storing them in a map
   * based on their partition positions.
   */
  private void findColumns() {
    int counterForColumns = 0;
    Map<Integer, ArrayList<ArrayList<Element>>> columnMap =
        new HashMap<Integer, ArrayList<ArrayList<Element>>>();

    // Find highest amount of clusters:
    int highestAmountOfClusters = 0;
    for (Line line : data) {
      if (line.getClusterSize() > highestAmountOfClusters) {
        highestAmountOfClusters = line.getClusterSize();
      }
    }
    //        for(Line line : data){
    //            System.out.println(line.getClusterSize());
    //        }
    for (Line line : data) {
      for (ArrayList<Element> cluster : line.getClusters()) {
        if (columnMap.containsKey(counterForColumns)) {
          ArrayList<ArrayList<Element>> fullClusters = columnMap.get(counterForColumns);
          fullClusters.add(cluster);
          columnMap.put(counterForColumns, fullClusters);
        } else {
          ArrayList<ArrayList<Element>> fullClusters = new ArrayList<ArrayList<Element>>();
          fullClusters.add(cluster);
          columnMap.put(counterForColumns, fullClusters);
        }
        counterForColumns++;
      }
      line.getClusters();
      counterForColumns = 0;
    }
    this.dataByColumn = columnMap;
    System.out.println("columMap: ");
    System.out.println(dataByColumn);
  }
Beispiel #2
0
  /**
   * This method separates the data based on the amount of partitions (clusters) in each line. The
   * lines before we can find any lines with partitions are stored in the private titleAndHeaders
   * variable The lines that contain partitions are stored in the private data variable The lines
   * that were inside the data lines and contained just one partition are stored in the private
   * rowspanner variable.
   */
  private void separateDataByCluster() {
    ArrayList<Line> titleAndHeaders = new ArrayList<Line>();
    ArrayList<Line> data = new ArrayList<Line>();
    boolean foundData = false;
    Line breakingLine = null;
    Line doubleBreakingLine = null;
    ArrayList<Line> rowSpanners = new ArrayList<Line>();
    boolean breaking = false;
    //        System.out.println("---------------------------------------------------------");
    for (Line line : table) {
      ArrayList<ArrayList<Element>> clusters = line.getClusters();
      int size = clusters.size();

      //            System.out.println(size + " " + line);

      if (size < 1 && foundData && breakingLine != null && doubleBreakingLine != null) {
        break; // then we have reached the end of the table.
      } else if (size < 1 && foundData && breakingLine != null) {
        doubleBreakingLine = line;
      } else if (size < 1 && foundData && breakingLine == null) {
        breakingLine = line;
      } else if (size < 2) { // Found no data, so should be above it.
        titleAndHeaders.add(line);
      } else if (breakingLine == null) {
        data.add(line); // Hooray, data!
        foundData = true;
      } else if (size > 1 && doubleBreakingLine != null && breakingLine != null) {
        rowSpanners.add(breakingLine);
        rowSpanners.add(doubleBreakingLine);
        data.add(line);
        breakingLine = null;
        doubleBreakingLine = null;
      } else if (size > 1 && breakingLine != null) {
        rowSpanners.add(breakingLine);
        data.add(line);
        breakingLine = null;
      }
    }
    //        System.out.println("breaking line: " + breakingLine);
    this.titleAndHeaders = titleAndHeaders;
    this.data = data;
    this.rowSpanners = rowSpanners;
  }
Beispiel #3
0
 /**
  * This method adds lines with missing partitions to the current columns. It loops trough lines
  * that were flagged for containing missing data and then adds the ones to columns that they fit
  * in (or to columns that fit in the partition). If this fails it will also try to check if the
  * partition merely touches a column, although this will return a lower validation score if
  * successful.
  */
 private void addLinesWithMissingDataToColumns() {
   ArrayList<Cell> cellsWithMissingDataAdded = new ArrayList<Cell>();
   ArrayList<Column2> newDataInColumns = new ArrayList<Column2>();
   for (Line line : linesWithMissingData) {
     ArrayList<ArrayList<Element>> clusters = line.getClusters();
     for (ArrayList<Element> cluster : clusters) {
       for (Column2 column : dataInColumns) {
         if (column.fitsInColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))
             && column.columnFitsIn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
           // then we need to add this cluster to that column:
           newDataInColumns.remove(column);
           column.addCell(cluster);
           newDataInColumns.add(column);
           Cell cell = new Cell(cluster, 3, line.getLineNumber());
           cellsWithMissingDataAdded.add(cell);
         } else if (column.fitsInColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))
             || column.columnFitsIn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
           // then we need to add this cluster to that column:
           newDataInColumns.remove(column);
           column.addCell(cluster);
           newDataInColumns.add(column);
           Cell cell = new Cell(cluster, 2, line.getLineNumber());
           cellsWithMissingDataAdded.add(cell);
         } else if (column.touchesColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
           //                        System.out.println("CLUSTER: " + cluster + " touches: " +
           // column);
           newDataInColumns.remove(column);
           column.addCell(cluster);
           newDataInColumns.add(column);
           Cell cell = new Cell(cluster, 1, line.getLineNumber());
           cellsWithMissingDataAdded.add(cell);
         }
       }
     }
   }
   validation.setCellsWithMissingDataAdded(cellsWithMissingDataAdded.size());
   validation.setCellsWithMissingDataAddedScores(cellsWithMissingDataAdded);
 }