Пример #1
0
 /** Creates collator for the writer base on it's format info an properties */
 protected static Collator createCollator(BaseWriter writer) throws Exception {
   Collator collator = null;
   if (writer.getFormatInfo().isSortingExpected()) {
     LanguageDirectionsInfo directionsInfo = writer.getLanguageDirectionsInfo();
     collator =
         new BasicCollatorFactory()
             .createCollator(
                 directionsInfo.getCombinedCollationRules(),
                 directionsInfo.getDefaultCollationProperties().getCollationStrength(),
                 directionsInfo.getDefaultCollationProperties().getCollationDecomposition());
   }
   return collator;
 }
Пример #2
0
  protected static void convertAbbreviations(
      BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress)
      throws Exception {

    Set<String> keys = reader.getAbbreviationKeys();
    if (keys == null) {
      log.info("No abbreviations available");
      return;
    }

    log.info("Number of abbreviations: {}", keys.size());

    // Update progress
    observer.update(progress.setMessage("Sorting Abbreviations"), null);

    ArrayList<String> sortedKeys = new ArrayList<String>(keys.size());
    for (String key : keys) {
      sortedKeys.add(key);
    }

    // Create Collator and sort abbreviations
    Collator collator = createCollator(writer);
    if (collator != null) {
      Collections.sort(sortedKeys, collator);
    } else {
      log.warn("Collator couldn't be created, sorting of Abbreviations is skipped");
    }

    // Update progress
    observer.update(progress.setMessage("Converting Abbreviations"), null);

    for (Iterator<String> iterator = sortedKeys.iterator(); iterator.hasNext(); ) {
      String abbr = iterator.next();
      AbbreviationInfo abbrInfo = reader.getAbbreviationInfo(abbr);
      String definition = abbrInfo.getAbbreviation();
      if (abbr == null
          || definition == null
          || abbr.trim().equals("")
          || definition.trim().equals("")) {
        log.info("Abbreviation {} is excluded", abbrInfo);
        continue;
      }

      abbrInfo.setAbbreviation(abbrInfo.getAbbreviation().trim());
      abbrInfo.setDefinition(abbrInfo.getDefinition().trim());

      writer.saveAbbreviationInfo(reader.getAbbreviationInfo(abbr));
    }

    writer.flush();
  }
Пример #3
0
 protected static void convertArticlesKeys(BaseReader reader, BaseWriter writer) throws Exception {
   List<String> words = reader.getWords();
   ArticleInfo articleInfo = new ArticleInfo(new WordInfo(""), "");
   for (String word : words) {
     articleInfo.getWordInfo().setWord(word);
     writer.saveAdaptedArticleInfo(articleInfo);
   }
 }
Пример #4
0
 protected static void convertMediaResourcesKeys(BaseReader reader, BaseWriter writer)
     throws Exception {
   Set<String> mediaKeys = reader.getMediaResourceKeys();
   MediaResourceInfo resourceInfo = new MediaResourceInfo(new MediaResourceKey(""), null);
   for (String mkey : mediaKeys) {
     resourceInfo.getKey().setResourceKey(mkey);
     writer.saveMediaResourceInfo(resourceInfo);
   }
 }
Пример #5
0
 protected static void convertAbbreviationsKeys(BaseReader reader, BaseWriter writer)
     throws Exception {
   Set<String> keys = reader.getAbbreviationKeys();
   AbbreviationInfo abbrevInfo = new AbbreviationInfo("", "");
   for (String key : keys) {
     abbrevInfo.setKey(key);
     writer.saveAbbreviationInfo(abbrevInfo);
   }
 }
Пример #6
0
 protected static void convertBaseResources(BaseReader reader, BaseWriter writer)
     throws Exception {
   for (BaseResourceKey brk : BaseResourceKey.values()) {
     if (brk.isAutomatic()) {
       BaseResourceInfo resInfo = reader.getBaseResourceInfo(brk.getKey());
       if (resInfo != null) {
         writer.saveBaseResourceInfo(resInfo);
       }
     }
   }
 }
Пример #7
0
  protected static void convertMediaResources(
      BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress)
      throws Exception {

    Set<String> keys = reader.getMediaResourceKeys();
    if (keys == null) {
      log.info("No media resources available");
      return;
    }

    log.info("Number of media resources: {}", keys.size());

    // Update progress
    observer.update(progress.setMessage("Sorting Media Resources"), null);

    ArrayList<String> sortedKeys = new ArrayList<String>(keys.size());
    for (String key : keys) {
      sortedKeys.add(key);
    }

    // Create Collator and sort articles
    Collator collator = createCollator(writer);
    if (collator != null) {
      Collections.sort(sortedKeys, collator);
    } else {
      log.warn("Collator couldn't be created, sorting of Media Resources is skipped");
    }

    // Update progress
    observer.update(progress.setMessage("Converting Media Resources"), null);

    for (Iterator<String> iterator = sortedKeys.iterator(); iterator.hasNext(); ) {
      String sortedKey = iterator.next();
      MediaResourceInfo mediaInfo = reader.getMediaResourceInfo(new MediaResourceKey(sortedKey));
      mediaInfo.getKey().setResourceKey(mediaInfo.getKey().getResourceKey());
      writer.saveMediaResourceInfo(mediaInfo);
    }

    writer.flush();
  }
Пример #8
0
  /**
   * Method converts the input base to the output. The observer operates on <code>ProgressInfo
   * </code>, so the submitted observer instance is expected to recognize it.
   *
   * <p>Reader and writer are not closed after the process, so it's expected at the calling layer.
   *
   * @param reader - input base
   * @param writer - output base
   * @param observer - tracks the conversion progress
   * @throws Exception
   */
  public static void convert(BaseReader reader, BaseWriter writer, Observer observer)
      throws Exception {

    ProgressInfo progress = new ProgressInfo();

    // Apply simplified conversion when the output base contains keys only
    if (writer.getFormatInfo().hasKeysOnly()) {
      // Convert keys
      convertAbbreviationsKeys(reader, writer);
      convertArticlesKeys(reader, writer);
      convertMediaResourcesKeys(reader, writer);
    } else {
      // Convert keys & articles
      convertBaseResources(reader, writer);
      convertAbbreviations(reader, writer, observer, progress);
      convertArticles(reader, writer, observer, progress);
      convertMediaResources(reader, writer, observer, progress);
    }
  }
Пример #9
0
  protected static void convertArticles(
      BaseReader reader, BaseWriter writer, Observer observer, ProgressInfo progress)
      throws Exception {

    log.info("Started converting articles: Size {}", reader.getWords().size());

    // Update progress
    observer.update(progress.setMessage("Mapping and Sorting Articles"), null);

    // Clone words to speed up the process (it can be based on buffered list)
    List<String> srcWords = new ArrayList<String>(reader.getWords());

    // Mappings and redirects
    Map<Integer, String> srcMappings = reader.getAdaptedWordsMappings();
    Map<Integer, Integer> srcRedirects = reader.getWordsRedirects();

    // Create Collator and sort articles
    Collator collator = createCollator(writer);

    // Create and initialize words mapper
    ConversionWordsMapper wordsMapper =
        new ConversionWordsMapper(collator, srcWords, srcMappings, srcRedirects);
    wordsMapper.init();

    List<String> normWords = wordsMapper.getNormalizedWords();

    // Update progress
    observer.update(progress.setMessage("Converting Articles"), null);

    for (int i = 0; i < normWords.size(); i++) {

      String curWord = normWords.get(i);
      int oldWordId = wordsMapper.getOldWordId(curWord);

      // Word info to retrieve articles. Can't be moved outside because
      // it' overridden every time article is retrieved.
      WordInfo oldWordInfo = new WordInfo(oldWordId);
      oldWordInfo.setId(oldWordId);

      ArticleInfo inArticleInfo = reader.getAdaptedArticleInfo(oldWordInfo);

      if (inArticleInfo == null || inArticleInfo.getArticle() == null) {
        log.warn("Couldn't find article for {}", oldWordInfo);
        throw new IllegalStateException("Couldn't find article for " + oldWordInfo);
      }

      WordInfo newWordInfo = new WordInfo(i, curWord);

      // Add mapping if any
      String newMapping = wordsMapper.getNewWordMappingByOldWordId(oldWordId);
      if (newMapping != null) {
        newWordInfo.setWordMapping(newMapping);
      }

      // Add redirect if any
      int newRedirectToId = wordsMapper.getNewWordRedirect(oldWordId);
      if (newRedirectToId >= 0 && newRedirectToId != i) { // Don't allow redirects to itself
        newWordInfo.setRedirectToId(newRedirectToId);
      }

      ArticleInfo outArticleInfo = new ArticleInfo(newWordInfo, inArticleInfo.getArticle());

      writer.saveAdaptedArticleInfo(outArticleInfo);

      // if (i > 10000) break;

    }

    writer.flush();
  }