@Override protected void process(Document doc, String fieldName, List<AnnotatedField> fieldValues) throws PipelineException { for (AnnotatedField fieldValue : fieldValues) { messageDigest.update(fieldValue.getValue().getBytes(CHARSET)); } }
private static void process(String fieldName, AnnotatedField field) { final String text = field.getValue(); if (text == null) { log.debug("Field '{}' - null", fieldName); } else { List<Annotation> annotations = new ArrayList<Annotation>(); int from = -1; for (int i = 0; i < text.length(); i++) { final char c = text.charAt(i); if (from < 0) { if (!isWhiteSpace(c)) { from = i; } else { continue; } } if (Arrays.binarySearch(CHARS_SENT, c) >= 0 && from < i - 1) { if (i < text.length() - 1 && isWhiteSpace(text.charAt(i + 1))) { annotations.add(new BaseAnnotation(from, i)); from = -1; } } } if (from >= 0 && from < text.length() - 1) { annotations.add(new BaseAnnotation(from, text.length())); } log.debug("Field '{}' sentence annotations: {}", fieldName, annotations.size()); field.add(SENTENCE, annotations); } }