public List<Match> match(List<TextPosition> textPositions, final Pattern pattern) {
   StringBuilder sb = new StringBuilder(textPositions.size() * 2);
   for (TextPosition textPosition : textPositions) {
     if (textPosition != null) sb.append(textPosition.getCharacter());
   }
   return match(textPositions, sb.toString(), pattern);
 }
 private String getFont(List<TextPosition> word) {
   String baseFont = "";
   for (TextPosition position : word) {
     baseFont = position.getFont().getBaseFont();
   }
   return baseFont;
 }
  private float[] getLineStartEnd(List<TextPosition> line) {
    TextPosition first = line.get(0);
    TextPosition last = line.get(line.size() - 1);

    float[] borderArr = new float[2];
    borderArr[0] = first.getXDirAdj();
    borderArr[1] = last.getXDirAdj();

    return borderArr;
  }
예제 #4
0
  @Override
  protected void writeCharacters(final TextPosition aText) throws IOException {
    if (log.isTraceEnabled()) {
      log.trace("[" + aText.getCharacter() + "]");
    }

    if (regionText == null) {
      throw new IllegalStateException("No region started");
    }

    regionText.append(aText.getCharacter());
  }
  private void getTheCharacters(List<TextPosition> word) {

    for (int i = 0; i < word.size(); i++) {
      TextPosition cha = word.get(i);

      List<TextPosition> sublist = word.subList(i, i);
      db.insertWordCharacter(
          rf.convertToCorrectForm(cha),
          cha.getXDirAdj(),
          cha.getXDirAdj(),
          lastInsertedWordId,
          rf.isBold(sublist),
          rf.isItalic(sublist),
          getFont(sublist));
    }
  }
  private void getTheWords(List<TextPosition> line) {
    float wordStart = 0;
    float wordEnd = 0;
    boolean isNextStart = false;
    StringBuilder builder = new StringBuilder();
    int startIndex = 0;

    for (int i = 0; i < line.size(); i++) {
      TextPosition pos = line.get(i);
      if (i == 0) {
        wordStart = pos.getXDirAdj();
        builder.delete(0, builder.length());
        startIndex = i;
      }

      if (isNextStart == true) {
        isNextStart = false;
        wordStart = pos.getXDirAdj();
        builder.delete(0, builder.length());
        startIndex = i;
      }

      if (rf.convertToCorrectForm(pos).trim().equals("") || i == (line.size() - 1)) {
        if (i == (line.size() - 1)) {
          builder.append(rf.convertToCorrectForm(pos));
          i++;
        }
        isNextStart = true;
        wordEnd = pos.getXDirAdj();
        List<TextPosition> word = line.subList(startIndex, i);
        lastInsertedWordId =
            db.insertWord(
                builder.toString(),
                wordStart,
                wordEnd,
                lastInsertedWorkLineId,
                rf.isBold(word),
                rf.isItalic(word),
                getFont(word));
        getTheCharacters(word);
      }

      builder.append(rf.convertToCorrectForm(pos));
    }
  }
예제 #7
0
 /**
  * A method provided as an event interface to allow a subclass to perform some specific
  * functionality when text needs to be processed
  *
  * @param text The text to be processed
  */
 protected void processTextPosition(TextPosition text) {
   System.out.println(
       "String["
           + text.getXDirAdj()
           + ","
           + text.getYDirAdj()
           + " fs="
           + text.getFontSize()
           + " xscale="
           + text.getXScale()
           + " height="
           + text.getHeightDir()
           + " space="
           + text.getWidthOfSpace()
           + " width="
           + text.getWidthDirAdj()
           + "]"
           + text.getCharacter());
 }
  /**
   * Computes a series of bounding boxes (PDRectangle) from a list of TextPositions. It will create
   * a new bounding box if the vertical tolerance is exceeded
   *
   * @param positions
   * @throws IOException
   */
  public List<PDRectangle> getTextBoundingBoxes(final List<TextPosition> positions) {
    final List<PDRectangle> boundingBoxes = new ArrayList<PDRectangle>();

    float lowerLeftX = -1, lowerLeftY = -1, upperRightX = -1, upperRightY = -1;
    boolean first = true;
    for (int i = 0; i < positions.size(); i++) {
      final TextPosition position = positions.get(i);
      if (position == null) {
        continue;
      }
      final Matrix textPos = position.getTextPos();
      final float height = position.getHeight() * getHeightModifier();
      if (first) {
        lowerLeftX = textPos.getXPosition();
        upperRightX = lowerLeftX + position.getWidth();

        lowerLeftY = textPos.getYPosition();
        upperRightY = lowerLeftY + height;
        first = false;
        continue;
      }

      // we are still on the same line
      if (Math.abs(textPos.getYPosition() - lowerLeftY) <= getVerticalTolerance()) {
        upperRightX = textPos.getXPosition() + position.getWidth();
        upperRightY = textPos.getYPosition() + height;
      } else {
        final PDRectangle boundingBox =
            boundingBox(lowerLeftX, lowerLeftY, upperRightX, upperRightY);
        boundingBoxes.add(boundingBox);

        // new line
        lowerLeftX = textPos.getXPosition();
        upperRightX = lowerLeftX + position.getWidth();

        lowerLeftY = textPos.getYPosition();
        upperRightY = lowerLeftY + height;
      }
    }
    if (!(lowerLeftX == -1 && lowerLeftY == -1 && upperRightX == -1 && upperRightY == -1)) {
      final PDRectangle boundingBox = boundingBox(lowerLeftX, lowerLeftY, upperRightX, upperRightY);
      boundingBoxes.add(boundingBox);
    }
    return boundingBoxes;
  }
 /**
  * Write the string in TextPosition to the text cache.
  *
  * @param text The text to write to the stream.
  */
 @Override
 protected void writeCharacters(final TextPosition text) {
   final String character = text.getCharacter();
   textCache.append(character, text);
 }