private String toArticleName(String name) { name = StringUtils.trim(name); name = RegexUtils.trim(name, "_"); name = StringUtils.trim(name); name = StringUtils.ucfirst(name); return name; }
@Override public List<Word> modifyWords(List<Word> words, XmlList xfact) { for (Word word : words) { if (word.hasTags() && word.getTags().isCategory("n")) { word.setWord(StringUtils.ucfirst(word.getWord())); } } return words; }
private String getShortDescription(String name) { LogUtils.i("Searching a short description for: \"" + name + "\""); final Iterable<String> html = FreehalFiles.createInstance("wikipedia", name).readLines(); List<String> desc = new ArrayList<String>(); final int limitLines = 3; for (String line : html) { // skip empty lines line = StringUtils.trim(line); if (line.length() == 0) continue; // skip disambiguation links if (langUtils.lineContainsDisambiguation(line)) continue; // skip images if (langUtils.lineContainsFile(line)) continue; // skip templates if (line.contains("[[Wikipedia:")) continue; // skip headings if (line.startsWith("==")) continue; // skip infoboxes if (line.contains("{{") || line.contains("}}") || (line.startsWith("|") && line.contains("="))) continue; // add sentences to description // is it the beginning of a list? if (line.endsWith(":") && !line.contains(".")) desc.add(line + "\n"); // or a list element else if (line.startsWith("*")) desc.add(line + "\n"); // or a normal paragraph... else if (desc.size() < limitLines) { for (String sentence : line.split("\\.( |$)")) { sentence = sentence.trim(); if (sentence.length() > 0 && desc.size() < limitLines) desc.add(sentence + ". "); } } else break; } if (desc.size() > 0) return toPlainText(StringUtils.join("", desc)); else return null; }