Beispiel #1
0
 private void replaceMissPlacedCells() {
   for (ArrayList<Element> cluster : missPlacedCells) {
     for (Column2 column : dataInColumns) {
       if (column.fitsInColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))
           && column.columnFitsIn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
         // then we need to add this cluster to that column:
         column.addCell(cluster);
       } else if (column.fitsInColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))
           || column.columnFitsIn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
         // then we need to add this cluster to that column:
         column.addCell(cluster);
       } else if (column.touchesColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
         //                    System.out.println("CLUSTER: " + cluster + " touches: " + column);
         column.addCell(cluster);
       }
     }
   }
 }
Beispiel #2
0
 /**
  * This method adds lines with missing partitions to the current columns. It loops trough lines
  * that were flagged for containing missing data and then adds the ones to columns that they fit
  * in (or to columns that fit in the partition). If this fails it will also try to check if the
  * partition merely touches a column, although this will return a lower validation score if
  * successful.
  */
 private void addLinesWithMissingDataToColumns() {
   ArrayList<Cell> cellsWithMissingDataAdded = new ArrayList<Cell>();
   ArrayList<Column2> newDataInColumns = new ArrayList<Column2>();
   for (Line line : linesWithMissingData) {
     ArrayList<ArrayList<Element>> clusters = line.getClusters();
     for (ArrayList<Element> cluster : clusters) {
       for (Column2 column : dataInColumns) {
         if (column.fitsInColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))
             && column.columnFitsIn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
           // then we need to add this cluster to that column:
           newDataInColumns.remove(column);
           column.addCell(cluster);
           newDataInColumns.add(column);
           Cell cell = new Cell(cluster, 3, line.getLineNumber());
           cellsWithMissingDataAdded.add(cell);
         } else if (column.fitsInColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))
             || column.columnFitsIn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
           // then we need to add this cluster to that column:
           newDataInColumns.remove(column);
           column.addCell(cluster);
           newDataInColumns.add(column);
           Cell cell = new Cell(cluster, 2, line.getLineNumber());
           cellsWithMissingDataAdded.add(cell);
         } else if (column.touchesColumn(Line.getClusterX1(cluster), Line.getClusterX2(cluster))) {
           //                        System.out.println("CLUSTER: " + cluster + " touches: " +
           // column);
           newDataInColumns.remove(column);
           column.addCell(cluster);
           newDataInColumns.add(column);
           Cell cell = new Cell(cluster, 1, line.getLineNumber());
           cellsWithMissingDataAdded.add(cell);
         }
       }
     }
   }
   validation.setCellsWithMissingDataAdded(cellsWithMissingDataAdded.size());
   validation.setCellsWithMissingDataAddedScores(cellsWithMissingDataAdded);
 }
Beispiel #3
0
  /**
   * This method is used for extraction of tables with lot of empty cells in it. It is required for
   * the successful extraction of most Matrix tables.
   */
  private void fillBlankCells() {
    // We say: cells get a line number. If a column does not contain a cell on a certain line, add a
    // whitespace.
    // Any cell that is not filled must be empty:
    for (Line line : data) {
      int lineNumber = line.getLineNumber();
      COLUMNLOOP:
      for (Column2 column : dataInColumns) {
        for (Cell cell : column.getCellObjects()) {
          if (cell.getLineNumber() == lineNumber) {
            break;
          }
          if (cell.getLineNumber() > line.getLineNumber()) { // the last cell?
            // Add a blank cell to this column.
            //                        System.out.println("Add line to :" + column + " in line: " +
            // line.getLineNumber());

            // <span class='ocrx_word' id='word_9' title="bbox 2175 514 2346 555">were</span>

            Tag t = Tag.valueOf("span");
            Attributes attributes = new Attributes();
            attributes.put("class", "ocrx_word");
            attributes.put("id", "word_ADDEDBYTEA");
            attributes.put(
                "title",
                "bbox "
                    + column.getAverageX1()
                    + " "
                    + (int) line.getAverageY1()
                    + " "
                    + column.getAverageX2()
                    + " "
                    + (int) line.getAverageY2());

            Element newElement = new Element(t, "localhost:8080", attributes);
            newElement.text(" ");
            ArrayList<Element> newCell = new ArrayList<Element>();
            newCell.add(newElement);
            //                        System.out.println("adding: " +newElement.text());
            column.addCell(newCell);
            break COLUMNLOOP;
          }
        }
      }
    }
  }