Java WordText Examples

Programming Language: Java

Class/Type: WordText

Examples at hotexamples.com: 4

Java WordText - 4 examples found. These are the top rated real world Java examples of WordText extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getText(2)

clearBounds(1)

getBounds(1)

getGlyphs(1)

getTextExtractionBounds(1)

Example #1

Show file

File: PageText.java Project: svn2github/icepdf

  /**
   * Takes the raw page lines represented as one continuous line and sorts the text by the y access
   * of the word bounds. The words are then sliced into separate lines base on y changes. And
   * finally each newly sorted line is sorted once more by each words x coordinate.
   */
  public void sortAndFormatText() {
    ArrayList<LineText> visiblePageLines = new ArrayList<LineText>(pageLines);
    // create new array for storing the sorted lines
    ArrayList<LineText> sortedPageLines = sortLinesVertically(visiblePageLines);
    // try and insert the option words on existing lines
    if (sortedPageLines.size() == 0) {
      sortedPageLines = getVisiblePageLines(true);
    } else {
      insertOptionalLines(sortedPageLines);
    }

    // sort again
    sortedPageLines = sortLinesVertically(sortedPageLines);

    // do a rough check for duplicate strings that are sometimes generated
    // by Chrystal Reports.  Enable with
    // -Dorg.icepdf.core.views.page.text.trim.duplicates=true
    if (checkForDuplicates) {
      for (final LineText lineText : sortedPageLines) {
        final List<WordText> words = lineText.getWords();
        if (words.size() > 0) {
          final List<WordText> trimmedWords = new ArrayList<WordText>();
          final Set<String> refs = new HashSet<String>();
          for (final WordText wordText : words) {
            // use regular rectangle so get a little rounding.
            final String key = wordText.getText() + wordText.getBounds().getBounds();
            if (refs.add(key)) {
              trimmedWords.add(wordText);
            }
          }
          lineText.setWords(trimmedWords);
        }
      }
    }

    // sort each line by x coordinate.
    if (sortedPageLines.size() > 0) {
      for (LineText lineText : sortedPageLines) {
        Collections.sort(lineText.getWords(), new WordPositionComparator());
      }
    }

    // recalculate the line bounds.
    if (sortedPageLines.size() > 0) {
      for (LineText lineText : sortedPageLines) {
        lineText.getBounds();
      }
    }

    // sort the lines
    if (sortedPageLines.size() > 0 && !preserveColumns) {
      Collections.sort(sortedPageLines, new LinePositionComparator());
    }
    // assign back the sorted lines.
    this.sortedPageLines = sortedPageLines;
  }

Example #2

Show file

File: PageText.java Project: svn2github/icepdf

  public String toString() {
    StringBuilder extractedText = new StringBuilder();
    for (LineText lineText : pageLines) {

      for (WordText wordText : lineText.getWords()) {
        extractedText.append(wordText.getText());
      }
      extractedText.append('\n');
    }
    return extractedText.toString();
  }

Example #3

Show file

File: PageText.java Project: svn2github/icepdf

 /**
  * Utility method to normalize text created in a Xform content stream and is only called from the
  * contentParser when parsing 'Do' token.
  *
  * @param transform do matrix transform
  */
 public void applyXObjectTransform(AffineTransform transform) {
   for (LineText lineText : pageLines) {
     lineText.clearBounds();
     for (WordText wordText : lineText.getWords()) {
       wordText.clearBounds();
       for (GlyphText glyph : wordText.getGlyphs()) {
         glyph.normalizeToUserSpace(transform, null);
       }
     }
   }
 }

Example #4

Show file

File: PageText.java Project: svn2github/icepdf

 /**
  * Sorts the given pageLines vertically (y coordinate) in page space. .
  *
  * @param pageLines page lines to sort, not directly sorted, new array is created for sorted data.
  * @return new array of sorted pages lines
  */
 private ArrayList<LineText> sortLinesVertically(ArrayList<LineText> pageLines) {
   ArrayList<LineText> sortedPageLines = new ArrayList<LineText>(64);
   // move over all
   for (LineText pageLine : pageLines) {
     // all page words will be on one line
     java.util.List<WordText> words = pageLine.getWords();
     if (words != null && words.size() > 0) {
       // break the words into lines on every change of y
       double lastY = Math.round(words.get(0).getTextExtractionBounds().y);
       int start = 0, end = 0;
       double currentY, diff;
       for (WordText word : words) {
         currentY = Math.round(word.getTextExtractionBounds().getY());
         // little bit of tolerance for detecting a line,  basically anything that is
         // >  then half the current word height / 2 will be marked as a break.
         // this works well enough sub and super script and inconsistencies
         // on table base text.
         diff = Math.abs(currentY - lastY);
         if (diff != 0 && diff > word.getTextExtractionBounds().getHeight() / 2) {
           LineText lineText = new LineText();
           lineText.addAll(words.subList(start, end));
           sortedPageLines.add(lineText);
           start = end;
         }
         end++;
         lastY = currentY;
       }
       if (start < end) {
         LineText lineText = new LineText();
         lineText.addAll(words.subList(start, end));
         sortedPageLines.add(lineText);
       }
     }
   }
   return sortedPageLines;
 }