@Override protected boolean enhanceMetadata(BxZone zone, DocumentMetadata metadata) { Matcher simpleMatcher = simplePattern.matcher(zone.toText()); if (simpleMatcher.find()) { MatchResult result = simpleMatcher.toMatchResult(); String day = result.group(2); String month = null; for (int i = 0; i < 12; i++) { if (result.group(i + 3) != null) { month = String.valueOf(i + 1); break; } } String year = result.group(15); enhanceMetadata(metadata, day, month, year); return true; } Matcher anotherMatcher = anotherPattern.matcher(zone.toText()); if (anotherMatcher.find()) { MatchResult result = anotherMatcher.toMatchResult(); String day = result.group(14); String month = null; for (int i = 0; i < 12; i++) { if (result.group(i + 2) != null) { month = String.valueOf(i + 1); break; } } String year = result.group(15); enhanceMetadata(metadata, day, month, year); return true; } return false; }
@Override public double calculateFeatureValue(BxZone zone, BxPage page) { return (zone.toText().matches("^.*[\\u0391-\\u03A9].*$") || zone.toText().matches("^.*[\\u03B1-\\u03C9].*$")) ? 1 : 0; }
@Override public double calculateFeatureValue(BxZone object, BxPage context) { String text = object.toText(); String[] words = text.split("\\s"); List<Integer> wordLengths = new ArrayList<Integer>(words.length); for (String word : words) wordLengths.add(word.length()); Collections.sort(wordLengths); return wordLengths.get(wordLengths.size() / 2); }
@Override public double calculateFeatureValue(BxZone zone, BxPage page) { String[] keywords = {"abstract", "keywords", "key words" // , "background", "methods", "results", "conclusions", "purpose", "trial", "discussion", // "summary", "conclusion" }; for (String keyword : keywords) { if (zone.toText().toLowerCase().startsWith(keyword)) { return 1; } } return 0; }