public static void main(String[] args) throws WikiApiException { // db connection settings DatabaseConfiguration dbConfig = new DatabaseConfiguration(); dbConfig.setDatabase("DATABASE"); dbConfig.setHost("HOST"); dbConfig.setUser("USER"); dbConfig.setPassword("PASSWORD"); dbConfig.setLanguage(Language.english); // initialize a wiki Wikipedia wiki = new Wikipedia(dbConfig); // get the page 'Dog' Page p = wiki.getPage("Dog"); // get a ParsedPage object MediaWikiParserFactory pf = new MediaWikiParserFactory(); pf.setTemplateParserClass(FlushTemplates.class); // Filtering TEMPLATE-Elements String IMAGE = "Image"; // Replace it with the image template name in your Wiki language edition, // e.g. "Image" in English // filtering Image-Elements pf.getImageIdentifers().add(IMAGE); // parse page text MediaWikiParser parser = pf.createParser(); ParsedPage pp = parser.parse(p.getText()); System.out.println(pp.getText()); }
/** * Creates the vocabulary related to the given category. This vocabulary is composed by the terms * that appears in the category articles and its frequency. * * @param category The category. * @return The vocabulary related to the category. * @throws WikiApiException */ public DomainVocabulary createCategoryVocabulary(Category category) throws WikiApiException { CHK.CHECK_NOT_NULL(category); Locale language = new Locale(wiki.getLanguage().name()); DomainVocabulary vocabulary = new DomainVocabulary(language); HashSet<Page> pages = null; pages = (HashSet<Page>) category.getArticles(); for (Page page : pages) { String text = wiki.getParsedArticle(page.getPageId()).getText(); vocabulary.addTerms(text); } return vocabulary; }
// TODO Use SWEBLE @Override protected String getPlainDocumentText(Page page) { ParsedPage pp = parser.parse(page.getText()); if (pp != null) { return pp.getText(); } else { return ""; } }
public static void main(String[] args) throws WikiApiException { // configure the database connection parameters DatabaseConfiguration dbConfig = new DatabaseConfiguration(); dbConfig.setHost("SERVER_URL"); dbConfig.setDatabase("DATABASE"); dbConfig.setUser("USER"); dbConfig.setPassword("PASSWORD"); dbConfig.setLanguage(Language.german); // Create a new German wikipedia. Wikipedia wiki = new Wikipedia(dbConfig); // Get the category "Towns in Germany" String title = "Towns in Germany"; Category topCat; try { topCat = wiki.getCategory(title); } catch (WikiPageNotFoundException e) { throw new WikiApiException("Category " + title + " does not exist"); } // Add the pages categorized under "Towns in Germany". Set<String> towns = new TreeSet<String>(); for (Page p : topCat.getArticles()) { towns.add(p.getTitle().getPlainTitle()); } // Get the pages categorized under each subcategory of "Towns in Germany". for (Category townCategory : topCat.getDescendants()) { for (Page p : townCategory.getArticles()) { towns.add(p.getTitle().getPlainTitle()); } System.out.println("Number of towns: " + towns.size()); } // Output the pages for (String town : towns) { System.out.println(town); } }
@Override protected boolean isValidPage(Page page) throws WikiTitleParsingException { return page.isDiscussion(); }