/** * Checks if a category belongs to the domain defined by the given vocabulary. A category belongs * to one domain if its title contains at least one word which is included in the domain * vocabulary. * * @param category The category to check. * @param vocabulary The vocabulary of the desired domain. * @return {@code true} if the category belongs to the domain. Otherwise, {@code false}. * @throws WikiTitleParsingException */ public boolean isDomain(Category category, DomainVocabulary vocabulary) throws WikiTitleParsingException { CHK.CHECK_NOT_NULL(category); boolean isDomain = false; String title = category.getTitle().getPlainTitle(); Iterator<String> iterator = vocabulary.preprocess(title).iterator(); while (iterator.hasNext() && !isDomain) { String term = iterator.next(); isDomain = vocabulary.contains(term); } return isDomain; }
public static void main(String args[]) throws IOException { String VOC_OUT_FILE = "%s_%s_%s.vocabulary"; CommandLine cLine = parseArguments(args); Locale language = new Locale(cLine.getOptionValue("l")); int year = Integer.valueOf(cLine.getOptionValue("y")); CategoryExplorer explorer = new CategoryExplorer(language, year); Category cat = null; DomainVocabulary vocabulary = new DomainVocabulary(explorer.getLocale()); ArrayList<GroupOfCategories> groupsList = null; int step = 1; if (cLine.hasOption("c") && cLine.hasOption("t") && cLine.hasOption("s") && cLine.hasOption("v")) { step = 2; String category = cLine.getOptionValue("c"); cat = explorer.loadCategory(Integer.parseInt(category)); File input = new File(cLine.getOptionValue("v")); vocabulary.insertFromFile(input); } if (cLine.hasOption("k") && cLine.hasOption("s")) { step = 3; // TODO init groupsList with k file } switch (step) { case 1: // Step 1: Create domain vocabulary logger.info("Step 1."); String category = cLine.getOptionValue("c"); cat = explorer.loadCategory(Integer.parseInt(category)); if (cat == null) { // TODO } try { vocabulary = explorer.createCategoryVocabulary(cat); } catch (WikiApiException e) { logger.errorEnd("Exception during Wikipedia DB access:" + e.getLocalizedMessage()); e.printStackTrace(); } String filename = String.format(VOC_OUT_FILE, category, language, year); File vocabularyFile = new File(System.getProperty("user.dir"), filename); vocabulary.toFile(vocabularyFile); case 2: // Step 2: Explore Wikipedia categories with the most frequent terms // of the vocabulary logger.info("Step 2."); String top = cLine.getOptionValue("t"); vocabulary = vocabulary.getTop(Float.parseFloat(top)); groupsList = explorer.scoreDomain(vocabulary, cat); // TODO Save groups case 3: // Extract the categories which belong to the domain logger.info("Step 3."); String score = cLine.getOptionValue("s"); HashSet<Category> domain = (HashSet<Category>) explorer.getDomainCategories(groupsList, Double.parseDouble(score)); for (Category domainCat : domain) { try { System.out.println( domainCat.getPageId() + "\t" + domainCat.getTitle().getWikiStyleTitle()); } catch (WikiTitleParsingException e) { logger.error("The category has no title assigned" + e.getLocalizedMessage()); e.printStackTrace(); } } } logger.info("End of the process."); }