public static void main(String[] args) throws WikiApiException {

    // db connection settings
    DatabaseConfiguration dbConfig = new DatabaseConfiguration();
    dbConfig.setDatabase("DATABASE");
    dbConfig.setHost("HOST");
    dbConfig.setUser("USER");
    dbConfig.setPassword("PASSWORD");
    dbConfig.setLanguage(Language.english);

    // initialize a wiki
    Wikipedia wiki = new Wikipedia(dbConfig);

    // get the page 'Dog'
    Page p = wiki.getPage("Dog");

    // get a ParsedPage object
    MediaWikiParserFactory pf = new MediaWikiParserFactory();
    pf.setTemplateParserClass(FlushTemplates.class); // Filtering TEMPLATE-Elements

    String IMAGE =
        "Image"; // Replace it with the image template name in your Wiki language edition,
    // e.g. "Image" in English

    // filtering Image-Elements
    pf.getImageIdentifers().add(IMAGE);

    // parse page text
    MediaWikiParser parser = pf.createParser();
    ParsedPage pp = parser.parse(p.getText());

    System.out.println(pp.getText());
  }
  /**
   * Creates the vocabulary related to the given category. This vocabulary is composed by the terms
   * that appears in the category articles and its frequency.
   *
   * @param category The category.
   * @return The vocabulary related to the category.
   * @throws WikiApiException
   */
  public DomainVocabulary createCategoryVocabulary(Category category) throws WikiApiException {
    CHK.CHECK_NOT_NULL(category);
    Locale language = new Locale(wiki.getLanguage().name());
    DomainVocabulary vocabulary = new DomainVocabulary(language);
    HashSet<Page> pages = null;
    pages = (HashSet<Page>) category.getArticles();
    for (Page page : pages) {
      String text = wiki.getParsedArticle(page.getPageId()).getText();
      vocabulary.addTerms(text);
    }

    return vocabulary;
  }
  // TODO Use SWEBLE
  @Override
  protected String getPlainDocumentText(Page page) {
    ParsedPage pp = parser.parse(page.getText());

    if (pp != null) {
      return pp.getText();
    } else {
      return "";
    }
  }
Example #4
0
  public static void main(String[] args) throws WikiApiException {

    // configure the database connection parameters
    DatabaseConfiguration dbConfig = new DatabaseConfiguration();
    dbConfig.setHost("SERVER_URL");
    dbConfig.setDatabase("DATABASE");
    dbConfig.setUser("USER");
    dbConfig.setPassword("PASSWORD");
    dbConfig.setLanguage(Language.german);

    // Create a new German wikipedia.
    Wikipedia wiki = new Wikipedia(dbConfig);

    // Get the category "Towns in Germany"
    String title = "Towns in Germany";
    Category topCat;
    try {
      topCat = wiki.getCategory(title);
    } catch (WikiPageNotFoundException e) {
      throw new WikiApiException("Category " + title + " does not exist");
    }

    // Add the pages categorized under "Towns in Germany".
    Set<String> towns = new TreeSet<String>();
    for (Page p : topCat.getArticles()) {
      towns.add(p.getTitle().getPlainTitle());
    }

    // Get the pages categorized under each subcategory of "Towns in Germany".
    for (Category townCategory : topCat.getDescendants()) {
      for (Page p : townCategory.getArticles()) {
        towns.add(p.getTitle().getPlainTitle());
      }
      System.out.println("Number of towns: " + towns.size());
    }

    // Output the pages
    for (String town : towns) {
      System.out.println(town);
    }
  }
 @Override
 protected boolean isValidPage(Page page) throws WikiTitleParsingException {
   return page.isDiscussion();
 }