/** Serializes the given color. */ protected String serializeColor(PdfColor color) { if (color == null) { return null; } String id = color.getId(); if (id == null) { return null; } List<String> fields = new ArrayList<>(); // Add feature name. fields.add("color"); // Add id. fields.add(id); // Add rgb fields.add(String.valueOf(color.getR())); fields.add(String.valueOf(color.getG())); fields.add(String.valueOf(color.getB())); return CollectionUtils.join(fields, "\t"); }
/** Serializes the given font. */ protected String serializeFont(PdfFont font) { if (font == null) { return null; } String id = font.getId(); if (id == null) { return null; } List<String> fields = new ArrayList<>(); // Add feature name. fields.add("font"); // Add id. fields.add(id); // Add basename String basename = font.getBasename(); fields.add(basename != null ? basename : ""); fields.add(font.isBold() ? "1" : "0"); fields.add(font.isItalic() ? "1" : "0"); fields.add(font.isType3Font() ? "1" : "0"); return CollectionUtils.join(fields, "\t"); }
/** Serializes the given (non-text) element. */ protected String serializeNonTextElement(PdfElement element) { if (element == null) { return null; } List<String> fields = new ArrayList<>(); // Add feature name. PdfFeature feature = element.getFeature(); fields.add(feature != null ? feature.getFieldName() : ""); // Add bounding box. Rectangle rect = element.getRectangle(); fields.add(rect != null ? String.valueOf(rect.getMinX()) : ""); fields.add(rect != null ? String.valueOf(rect.getMinY()) : ""); fields.add(rect != null ? String.valueOf(rect.getMaxX()) : ""); fields.add(rect != null ? String.valueOf(rect.getMaxY()) : ""); // Add color. PdfColor color = element.getColor(); fields.add(color != null ? color.getId() : ""); // Add role. PdfRole role = element.getRole(); fields.add(role != null ? role.name : ""); return CollectionUtils.join(fields, "\t"); }
protected String getHeader() { String[] columns = { CONTEXT_NAME_FEATURE, CONTEXT_NAME_ELEMENT_TEXT, CONTEXT_NAME_ELEMENT_PAGE, CONTEXT_NAME_ELEMENT_MIN_X, CONTEXT_NAME_ELEMENT_MIN_Y, CONTEXT_NAME_ELEMENT_MAX_X, CONTEXT_NAME_ELEMENT_MAX_Y, CONTEXT_NAME_ELEMENT_MOST_COMMON_FONT, CONTEXT_NAME_ELEMENT_MOST_COMMON_FONT_SIZE, CONTEXT_NAME_ELEMENT_MOST_COMMON_COLOR, CONTEXT_NAME_ELEMENT_FIRST_CHARACTER_FONT, CONTEXT_NAME_ELEMENT_FIRST_CHARACTER_FONTSIZE, CONTEXT_NAME_ELEMENT_FIRST_CHARACTER_COLOR, CONTEXT_NAME_ELEMENT_LAST_CHARACTER_FONT, CONTEXT_NAME_ELEMENT_LAST_CHARACTER_FONTSIZE, CONTEXT_NAME_ELEMENT_LAST_CHARACTER_COLOR, CONTEXT_NAME_ELEMENT_ROLE }; return CollectionUtils.join(columns, "\t"); }
/** Serializes the given text element. */ protected String serializeTextElement(PdfTextElement element) { if (element == null) { return null; } String text = element.getText( getSerializePunctuationMarks(), getSerializeSubscripts(), getSerializeSuperscripts()); if (text == null || text.trim().isEmpty()) { return null; } List<String> fields = new ArrayList<>(); // Add feature name. PdfFeature feature = element.getFeature(); fields.add(feature != null ? feature.getFieldName() : ""); // Add text. fields.add(text.replaceAll("\t", " ")); // Add page. PdfPage page = element.getPage(); fields.add(page != null ? String.valueOf(page.getPageNumber()) : ""); // Add bounding box. Rectangle rect = element.getRectangle(); fields.add(rect != null ? String.valueOf(rect.getMinX()) : ""); fields.add(rect != null ? String.valueOf(rect.getMinY()) : ""); fields.add(rect != null ? String.valueOf(rect.getMaxX()) : ""); fields.add(rect != null ? String.valueOf(rect.getMaxY()) : ""); // Add most common font. PdfFont font = element.getFont(); fields.add(font != null ? font.getId() : ""); fields.add(String.valueOf(element.getFontsize())); // Add most common color. PdfColor color = element.getColor(); fields.add(color != null ? color.getId() : ""); // Add font of first and last character. PdfFont firstCharacterFont = null; PdfFont lastCharacterFont = null; PdfColor firstCharacterColor = null; PdfColor lastCharacterColor = null; float firstCharacterFontsize = 0; float lastCharacterFontsize = 0; List<PdfCharacter> characters = element.getTextCharacters(); if (characters != null && !characters.isEmpty()) { Collections.sort(characters, new Comparators.MinXComparator()); PdfCharacter firstCharacter = characters.get(0); PdfCharacter lastCharacter = characters.get(characters.size() - 1); if (firstCharacter != null) { firstCharacterFont = firstCharacter.getFont(); firstCharacterFontsize = firstCharacter.getFontsize(); firstCharacterColor = firstCharacter.getColor(); } if (lastCharacter != null) { lastCharacterFont = lastCharacter.getFont(); lastCharacterFontsize = lastCharacter.getFontsize(); lastCharacterColor = lastCharacter.getColor(); } } // Append font and color of first character. fields.add(firstCharacterFont != null ? firstCharacterFont.getId() : ""); fields.add(String.valueOf(firstCharacterFontsize)); fields.add(firstCharacterColor != null ? firstCharacterColor.getId() : ""); // Append font and color of last character. fields.add(lastCharacterFont != null ? lastCharacterFont.getId() : ""); fields.add(String.valueOf(lastCharacterFontsize)); fields.add(lastCharacterColor != null ? lastCharacterColor.getId() : ""); // Add role. PdfRole role = element.getRole(); fields.add(role != null ? role.name : ""); return CollectionUtils.join(fields, "\t"); }
/** Writes the given json object to the given output stream. */ protected void writeTo(List<String> lines, OutputStream os) throws IOException { String serialized = CollectionUtils.join(lines, "\n"); os.write(serialized.getBytes(StandardCharsets.UTF_8)); }