private String blankSectionHeaders(String markup, StringBuffer context) { Pattern p = Pattern.compile("(={2,})([^=]+)\\1"); Matcher m = p.matcher(markup); int lastPos = 0; StringBuilder sb = new StringBuilder(); while (m.find()) { sb.append(markup.substring(lastPos, m.start())); sb.append(getSpaceString(m.group().length())); String title = m.group(2).trim(); if (!title.equalsIgnoreCase("see also") && !title.equalsIgnoreCase("external links") && !title.equalsIgnoreCase("references") && !title.equalsIgnoreCase("further reading")) context.append("\n").append(title); lastPos = m.end(); } sb.append(markup.substring(lastPos)); return sb.toString(); }
private String blankLinks(String markup, StringBuffer context, HashSet<Integer> bannedTopics) { List<Integer> linkStack = new ArrayList<Integer>(); Pattern p = Pattern.compile("(\\[\\[|\\]\\])"); Matcher m = p.matcher(markup); StringBuilder sb = new StringBuilder(); int lastIndex = 0; while (m.find()) { String tag = markup.substring(m.start(), m.end()); if (tag.equals("[[")) linkStack.add(m.start()); else { if (!linkStack.isEmpty()) { int linkStart = linkStack.size() - 1; linkStack.remove(linkStack.size() - 1); if (linkStack.isEmpty()) { sb.append(markup.substring(lastIndex, linkStart)); // we have the whole link, possibly with other links nested inside. for (int i = linkStart; i < m.end(); i++) sb.append(" "); processLink(markup.substring(linkStart + 2, m.start()), context, bannedTopics); lastIndex = m.end(); } } } } if (!linkStack.isEmpty()) { System.err.println( "WikiPreprocessor| Warning: links were not well formed, so we cannot guarantee that they were stripped out correctly. "); } sb.append(markup.substring(lastIndex)); return sb.toString(); }
private String clearAllMentionsRetainFirstCharacter(String regex, String text) { Pattern p = Pattern.compile(regex, Pattern.CASE_INSENSITIVE + Pattern.DOTALL); Matcher m = p.matcher(text); int lastPos = 0; StringBuilder sb = new StringBuilder(); while (m.find()) { sb.append(text.substring(lastPos, m.start())); sb.append(text.charAt(m.start())); for (int i = 1; i < m.group().length(); i++) sb.append(" "); lastPos = m.end(); } sb.append(text.substring(lastPos)); return sb.toString(); }
private String blankTemplates(String markup) { List<Integer> templateStack = new ArrayList<Integer>(); Pattern p = Pattern.compile("(\\{\\{|\\}\\})"); Matcher m = p.matcher(markup); StringBuilder sb = new StringBuilder(); int lastIndex = 0; while (m.find()) { String tag = markup.substring(m.start(), m.end()); if (tag.equals("{{")) templateStack.add(m.start()); else { if (!templateStack.isEmpty()) { int templateStart = templateStack.size() - 1; templateStack.remove(templateStack.size() - 1); if (templateStack.isEmpty()) { sb.append(markup.substring(lastIndex, templateStart)); // we have the whole template, with other templates nested inside for (int i = templateStart; i < m.end(); i++) sb.append(" "); lastIndex = m.end(); } } } } if (!templateStack.isEmpty()) System.err.println( "WikiPreprocessor | Warning: templates were not well formed, so we cannot guarantee that they were stripped out correctly. "); sb.append(markup.substring(lastIndex)); return sb.toString(); }