Пример #1
0
  protected static void convertAbbreviations(
      BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress)
      throws Exception {

    Set<String> keys = reader.getAbbreviationKeys();
    if (keys == null) {
      log.info("No abbreviations available");
      return;
    }

    log.info("Number of abbreviations: {}", keys.size());

    // Update progress
    observer.update(progress.setMessage("Sorting Abbreviations"), null);

    ArrayList<String> sortedKeys = new ArrayList<String>(keys.size());
    for (String key : keys) {
      sortedKeys.add(key);
    }

    // Create Collator and sort abbreviations
    Collator collator = createCollator(writer);
    if (collator != null) {
      Collections.sort(sortedKeys, collator);
    } else {
      log.warn("Collator couldn't be created, sorting of Abbreviations is skipped");
    }

    // Update progress
    observer.update(progress.setMessage("Converting Abbreviations"), null);

    for (Iterator<String> iterator = sortedKeys.iterator(); iterator.hasNext(); ) {
      String abbr = iterator.next();
      AbbreviationInfo abbrInfo = reader.getAbbreviationInfo(abbr);
      String definition = abbrInfo.getAbbreviation();
      if (abbr == null
          || definition == null
          || abbr.trim().equals("")
          || definition.trim().equals("")) {
        log.info("Abbreviation {} is excluded", abbrInfo);
        continue;
      }

      abbrInfo.setAbbreviation(abbrInfo.getAbbreviation().trim());
      abbrInfo.setDefinition(abbrInfo.getDefinition().trim());

      writer.saveAbbreviationInfo(reader.getAbbreviationInfo(abbr));
    }

    writer.flush();
  }
Пример #2
0
  protected static void convertMediaResources(
      BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress)
      throws Exception {

    Set<String> keys = reader.getMediaResourceKeys();
    if (keys == null) {
      log.info("No media resources available");
      return;
    }

    log.info("Number of media resources: {}", keys.size());

    // Update progress
    observer.update(progress.setMessage("Sorting Media Resources"), null);

    ArrayList<String> sortedKeys = new ArrayList<String>(keys.size());
    for (String key : keys) {
      sortedKeys.add(key);
    }

    // Create Collator and sort articles
    Collator collator = createCollator(writer);
    if (collator != null) {
      Collections.sort(sortedKeys, collator);
    } else {
      log.warn("Collator couldn't be created, sorting of Media Resources is skipped");
    }

    // Update progress
    observer.update(progress.setMessage("Converting Media Resources"), null);

    for (Iterator<String> iterator = sortedKeys.iterator(); iterator.hasNext(); ) {
      String sortedKey = iterator.next();
      MediaResourceInfo mediaInfo = reader.getMediaResourceInfo(new MediaResourceKey(sortedKey));
      mediaInfo.getKey().setResourceKey(mediaInfo.getKey().getResourceKey());
      writer.saveMediaResourceInfo(mediaInfo);
    }

    writer.flush();
  }
Пример #3
0
  protected static void convertArticles(
      BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress)
      throws Exception {

    log.info("Started converting articles: Size {}", reader.getWords().size());

    // Update progress
    observer.update(progress.setMessage("Mapping and Sorting Articles"), null);

    // Clone words to speed up the process (it can be based on buffered list)
    List<String> srcWords = new ArrayList<String>(reader.getWords());

    // Mappings and redirects
    Map<Integer, String> srcMappings = reader.getAdaptedWordsMappings();
    Map<Integer, Integer> srcRedirects = reader.getWordsRedirects();

    // Create Collator and sort articles
    Collator collator = createCollator(writer);

    // Create and initialize words mapper
    ConversionWordsMapper wordsMapper =
        new ConversionWordsMapper(collator, srcWords, srcMappings, srcRedirects);
    wordsMapper.init();

    List<String> normWords = wordsMapper.getNormalizedWords();

    // Update progress
    observer.update(progress.setMessage("Converting Articles"), null);

    for (int i = 0; i < normWords.size(); i++) {

      String curWord = normWords.get(i);
      int oldWordId = wordsMapper.getOldWordId(curWord);

      // Word info to retrieve articles. Can't be moved outside because
      // it' overridden every time article is retrieved.
      WordInfo oldWordInfo = new WordInfo(oldWordId);
      oldWordInfo.setId(oldWordId);

      ArticleInfo inArticleInfo = reader.getAdaptedArticleInfo(oldWordInfo);

      if (inArticleInfo == null || inArticleInfo.getArticle() == null) {
        log.warn("Couldn't find article for {}", oldWordInfo);
        throw new IllegalStateException("Couldn't find article for " + oldWordInfo);
      }

      WordInfo newWordInfo = new WordInfo(i, curWord);

      // Add mapping if any
      String newMapping = wordsMapper.getNewWordMappingByOldWordId(oldWordId);
      if (newMapping != null) {
        newWordInfo.setWordMapping(newMapping);
      }

      // Add redirect if any
      int newRedirectToId = wordsMapper.getNewWordRedirect(oldWordId);
      if (newRedirectToId >= 0 && newRedirectToId != i) { // Don't allow redirects to itself
        newWordInfo.setRedirectToId(newRedirectToId);
      }

      ArticleInfo outArticleInfo = new ArticleInfo(newWordInfo, inArticleInfo.getArticle());

      writer.saveAdaptedArticleInfo(outArticleInfo);

      // if (i > 10000) break;

    }

    writer.flush();
  }