protected static void convertArticlesKeys(BaseReader reader, BaseWriter writer) throws Exception { List<String> words = reader.getWords(); ArticleInfo articleInfo = new ArticleInfo(new WordInfo(""), ""); for (String word : words) { articleInfo.getWordInfo().setWord(word); writer.saveAdaptedArticleInfo(articleInfo); } }
protected static void convertArticles( BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress) throws Exception { log.info("Started converting articles: Size {}", reader.getWords().size()); // Update progress observer.update(progress.setMessage("Mapping and Sorting Articles"), null); // Clone words to speed up the process (it can be based on buffered list) List<String> srcWords = new ArrayList<String>(reader.getWords()); // Mappings and redirects Map<Integer, String> srcMappings = reader.getAdaptedWordsMappings(); Map<Integer, Integer> srcRedirects = reader.getWordsRedirects(); // Create Collator and sort articles Collator collator = createCollator(writer); // Create and initialize words mapper ConversionWordsMapper wordsMapper = new ConversionWordsMapper(collator, srcWords, srcMappings, srcRedirects); wordsMapper.init(); List<String> normWords = wordsMapper.getNormalizedWords(); // Update progress observer.update(progress.setMessage("Converting Articles"), null); for (int i = 0; i < normWords.size(); i++) { String curWord = normWords.get(i); int oldWordId = wordsMapper.getOldWordId(curWord); // Word info to retrieve articles. Can't be moved outside because // it' overridden every time article is retrieved. WordInfo oldWordInfo = new WordInfo(oldWordId); oldWordInfo.setId(oldWordId); ArticleInfo inArticleInfo = reader.getAdaptedArticleInfo(oldWordInfo); if (inArticleInfo == null || inArticleInfo.getArticle() == null) { log.warn("Couldn't find article for {}", oldWordInfo); throw new IllegalStateException("Couldn't find article for " + oldWordInfo); } WordInfo newWordInfo = new WordInfo(i, curWord); // Add mapping if any String newMapping = wordsMapper.getNewWordMappingByOldWordId(oldWordId); if (newMapping != null) { newWordInfo.setWordMapping(newMapping); } // Add redirect if any int newRedirectToId = wordsMapper.getNewWordRedirect(oldWordId); if (newRedirectToId >= 0 && newRedirectToId != i) { // Don't allow redirects to itself newWordInfo.setRedirectToId(newRedirectToId); } ArticleInfo outArticleInfo = new ArticleInfo(newWordInfo, inArticleInfo.getArticle()); writer.saveAdaptedArticleInfo(outArticleInfo); // if (i > 10000) break; } writer.flush(); }