コード例 #1
0
 protected static void convertArticlesKeys(BaseReader reader, BaseWriter writer) throws Exception {
   List<String> words = reader.getWords();
   ArticleInfo articleInfo = new ArticleInfo(new WordInfo(""), "");
   for (String word : words) {
     articleInfo.getWordInfo().setWord(word);
     writer.saveAdaptedArticleInfo(articleInfo);
   }
 }
コード例 #2
0
  protected static void convertArticles(
      BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress)
      throws Exception {

    log.info("Started converting articles: Size {}", reader.getWords().size());

    // Update progress
    observer.update(progress.setMessage("Mapping and Sorting Articles"), null);

    // Clone words to speed up the process (it can be based on buffered list)
    List<String> srcWords = new ArrayList<String>(reader.getWords());

    // Mappings and redirects
    Map<Integer, String> srcMappings = reader.getAdaptedWordsMappings();
    Map<Integer, Integer> srcRedirects = reader.getWordsRedirects();

    // Create Collator and sort articles
    Collator collator = createCollator(writer);

    // Create and initialize words mapper
    ConversionWordsMapper wordsMapper =
        new ConversionWordsMapper(collator, srcWords, srcMappings, srcRedirects);
    wordsMapper.init();

    List<String> normWords = wordsMapper.getNormalizedWords();

    // Update progress
    observer.update(progress.setMessage("Converting Articles"), null);

    for (int i = 0; i < normWords.size(); i++) {

      String curWord = normWords.get(i);
      int oldWordId = wordsMapper.getOldWordId(curWord);

      // Word info to retrieve articles. Can't be moved outside because
      // it' overridden every time article is retrieved.
      WordInfo oldWordInfo = new WordInfo(oldWordId);
      oldWordInfo.setId(oldWordId);

      ArticleInfo inArticleInfo = reader.getAdaptedArticleInfo(oldWordInfo);

      if (inArticleInfo == null || inArticleInfo.getArticle() == null) {
        log.warn("Couldn't find article for {}", oldWordInfo);
        throw new IllegalStateException("Couldn't find article for " + oldWordInfo);
      }

      WordInfo newWordInfo = new WordInfo(i, curWord);

      // Add mapping if any
      String newMapping = wordsMapper.getNewWordMappingByOldWordId(oldWordId);
      if (newMapping != null) {
        newWordInfo.setWordMapping(newMapping);
      }

      // Add redirect if any
      int newRedirectToId = wordsMapper.getNewWordRedirect(oldWordId);
      if (newRedirectToId >= 0 && newRedirectToId != i) { // Don't allow redirects to itself
        newWordInfo.setRedirectToId(newRedirectToId);
      }

      ArticleInfo outArticleInfo = new ArticleInfo(newWordInfo, inArticleInfo.getArticle());

      writer.saveAdaptedArticleInfo(outArticleInfo);

      // if (i > 10000) break;

    }

    writer.flush();
  }