public List<Match> match(List<TextPosition> textPositions, final Pattern pattern) { StringBuilder sb = new StringBuilder(textPositions.size() * 2); for (TextPosition textPosition : textPositions) { if (textPosition != null) sb.append(textPosition.getCharacter()); } return match(textPositions, sb.toString(), pattern); }
private String getFont(List<TextPosition> word) { String baseFont = ""; for (TextPosition position : word) { baseFont = position.getFont().getBaseFont(); } return baseFont; }
private float[] getLineStartEnd(List<TextPosition> line) { TextPosition first = line.get(0); TextPosition last = line.get(line.size() - 1); float[] borderArr = new float[2]; borderArr[0] = first.getXDirAdj(); borderArr[1] = last.getXDirAdj(); return borderArr; }
@Override protected void writeCharacters(final TextPosition aText) throws IOException { if (log.isTraceEnabled()) { log.trace("[" + aText.getCharacter() + "]"); } if (regionText == null) { throw new IllegalStateException("No region started"); } regionText.append(aText.getCharacter()); }
private void getTheCharacters(List<TextPosition> word) { for (int i = 0; i < word.size(); i++) { TextPosition cha = word.get(i); List<TextPosition> sublist = word.subList(i, i); db.insertWordCharacter( rf.convertToCorrectForm(cha), cha.getXDirAdj(), cha.getXDirAdj(), lastInsertedWordId, rf.isBold(sublist), rf.isItalic(sublist), getFont(sublist)); } }
private void getTheWords(List<TextPosition> line) { float wordStart = 0; float wordEnd = 0; boolean isNextStart = false; StringBuilder builder = new StringBuilder(); int startIndex = 0; for (int i = 0; i < line.size(); i++) { TextPosition pos = line.get(i); if (i == 0) { wordStart = pos.getXDirAdj(); builder.delete(0, builder.length()); startIndex = i; } if (isNextStart == true) { isNextStart = false; wordStart = pos.getXDirAdj(); builder.delete(0, builder.length()); startIndex = i; } if (rf.convertToCorrectForm(pos).trim().equals("") || i == (line.size() - 1)) { if (i == (line.size() - 1)) { builder.append(rf.convertToCorrectForm(pos)); i++; } isNextStart = true; wordEnd = pos.getXDirAdj(); List<TextPosition> word = line.subList(startIndex, i); lastInsertedWordId = db.insertWord( builder.toString(), wordStart, wordEnd, lastInsertedWorkLineId, rf.isBold(word), rf.isItalic(word), getFont(word)); getTheCharacters(word); } builder.append(rf.convertToCorrectForm(pos)); } }
/** * A method provided as an event interface to allow a subclass to perform some specific * functionality when text needs to be processed * * @param text The text to be processed */ protected void processTextPosition(TextPosition text) { System.out.println( "String[" + text.getXDirAdj() + "," + text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" + text.getXScale() + " height=" + text.getHeightDir() + " space=" + text.getWidthOfSpace() + " width=" + text.getWidthDirAdj() + "]" + text.getCharacter()); }
/** * Computes a series of bounding boxes (PDRectangle) from a list of TextPositions. It will create * a new bounding box if the vertical tolerance is exceeded * * @param positions * @throws IOException */ public List<PDRectangle> getTextBoundingBoxes(final List<TextPosition> positions) { final List<PDRectangle> boundingBoxes = new ArrayList<PDRectangle>(); float lowerLeftX = -1, lowerLeftY = -1, upperRightX = -1, upperRightY = -1; boolean first = true; for (int i = 0; i < positions.size(); i++) { final TextPosition position = positions.get(i); if (position == null) { continue; } final Matrix textPos = position.getTextPos(); final float height = position.getHeight() * getHeightModifier(); if (first) { lowerLeftX = textPos.getXPosition(); upperRightX = lowerLeftX + position.getWidth(); lowerLeftY = textPos.getYPosition(); upperRightY = lowerLeftY + height; first = false; continue; } // we are still on the same line if (Math.abs(textPos.getYPosition() - lowerLeftY) <= getVerticalTolerance()) { upperRightX = textPos.getXPosition() + position.getWidth(); upperRightY = textPos.getYPosition() + height; } else { final PDRectangle boundingBox = boundingBox(lowerLeftX, lowerLeftY, upperRightX, upperRightY); boundingBoxes.add(boundingBox); // new line lowerLeftX = textPos.getXPosition(); upperRightX = lowerLeftX + position.getWidth(); lowerLeftY = textPos.getYPosition(); upperRightY = lowerLeftY + height; } } if (!(lowerLeftX == -1 && lowerLeftY == -1 && upperRightX == -1 && upperRightY == -1)) { final PDRectangle boundingBox = boundingBox(lowerLeftX, lowerLeftY, upperRightX, upperRightY); boundingBoxes.add(boundingBox); } return boundingBoxes; }
/** * Write the string in TextPosition to the text cache. * * @param text The text to write to the stream. */ @Override protected void writeCharacters(final TextPosition text) { final String character = text.getCharacter(); textCache.append(character, text); }