Exemple #1
0
 /**
  * Sorts the given pageLines vertically (y coordinate) in page space. .
  *
  * @param pageLines page lines to sort, not directly sorted, new array is created for sorted data.
  * @return new array of sorted pages lines
  */
 private ArrayList<LineText> sortLinesVertically(ArrayList<LineText> pageLines) {
   ArrayList<LineText> sortedPageLines = new ArrayList<LineText>(64);
   // move over all
   for (LineText pageLine : pageLines) {
     // all page words will be on one line
     java.util.List<WordText> words = pageLine.getWords();
     if (words != null && words.size() > 0) {
       // break the words into lines on every change of y
       double lastY = Math.round(words.get(0).getTextExtractionBounds().y);
       int start = 0, end = 0;
       double currentY, diff;
       for (WordText word : words) {
         currentY = Math.round(word.getTextExtractionBounds().getY());
         // little bit of tolerance for detecting a line,  basically anything that is
         // >  then half the current word height / 2 will be marked as a break.
         // this works well enough sub and super script and inconsistencies
         // on table base text.
         diff = Math.abs(currentY - lastY);
         if (diff != 0 && diff > word.getTextExtractionBounds().getHeight() / 2) {
           LineText lineText = new LineText();
           lineText.addAll(words.subList(start, end));
           sortedPageLines.add(lineText);
           start = end;
         }
         end++;
         lastY = currentY;
       }
       if (start < end) {
         LineText lineText = new LineText();
         lineText.addAll(words.subList(start, end));
         sortedPageLines.add(lineText);
       }
     }
   }
   return sortedPageLines;
 }