Ejemplo n.º 1
0
 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
     final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
   final SuggestedWordInfo typedWordInfo = suggestions.get(0);
   typedWordInfo.setDebugString("+");
   final int suggestionsSize = suggestions.size();
   final ArrayList<SuggestedWordInfo> suggestionsList =
       CollectionUtils.newArrayList(suggestionsSize);
   suggestionsList.add(typedWordInfo);
   // Note: i here is the index in mScores[], but the index in mSuggestions is one more
   // than i because we added the typed word to mSuggestions without touching mScores.
   for (int i = 0; i < suggestionsSize - 1; ++i) {
     final SuggestedWordInfo cur = suggestions.get(i + 1);
     final float normalizedScore =
         BinaryDictionary.calcNormalizedScore(typedWord, cur.toString(), cur.mScore);
     final String scoreInfoString;
     if (normalizedScore > 0) {
       scoreInfoString = String.format(Locale.ROOT, "%d (%4.2f)", cur.mScore, normalizedScore);
     } else {
       scoreInfoString = Integer.toString(cur.mScore);
     }
     cur.setDebugString(scoreInfoString);
     suggestionsList.add(cur);
   }
   return suggestionsList;
 }
 public void testVer2DictGetWordProperty() {
   final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
   final ArrayList<String> words = sWords;
   final HashMap<String, List<String>> shortcuts = sShortcuts;
   final String dictName = "testGetWordProperty";
   final String dictVersion = Long.toString(System.currentTimeMillis());
   final FusionDictionary dict =
       new FusionDictionary(
           new PtNodeArray(),
           BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
   addUnigrams(words.size(), dict, words, shortcuts);
   addBigrams(dict, words, sEmptyBigrams);
   final File file =
       BinaryDictUtils.getDictFile(
           dictName, dictVersion, formatOptions, getContext().getCacheDir());
   file.delete();
   timeWritingDictToFile(file, dict, formatOptions);
   final BinaryDictionary binaryDictionary =
       new BinaryDictionary(
           file.getAbsolutePath(),
           0 /* offset */,
           file.length(),
           true /* useFullEditDistance */,
           Locale.ENGLISH,
           dictName,
           false /* isUpdatable */);
   for (final String word : words) {
     final WordProperty wordProperty =
         binaryDictionary.getWordProperty(word, false /* isBeginningOfSentence */);
     assertEquals(word, wordProperty.mWord);
     assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
     if (shortcuts.containsKey(word)) {
       assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
       final List<String> shortcutList = shortcuts.get(word);
       assertTrue(wordProperty.mHasShortcuts);
       for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
         assertTrue(shortcutList.contains(shortcutTarget.mWord));
         assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
         shortcutList.remove(shortcutTarget.mWord);
       }
       assertTrue(shortcutList.isEmpty());
     }
   }
 }
  public void testVer2DictIteration() {
    final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
    final ArrayList<String> words = sWords;
    final HashMap<String, List<String>> shortcuts = sShortcuts;
    final SparseArray<List<Integer>> bigrams = sEmptyBigrams;
    final String dictName = "testGetWordProperty";
    final String dictVersion = Long.toString(System.currentTimeMillis());
    final FusionDictionary dict =
        new FusionDictionary(
            new PtNodeArray(),
            BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
    addUnigrams(words.size(), dict, words, shortcuts);
    addBigrams(dict, words, bigrams);
    final File file =
        BinaryDictUtils.getDictFile(
            dictName, dictVersion, formatOptions, getContext().getCacheDir());
    timeWritingDictToFile(file, dict, formatOptions);
    Log.d(TAG, file.getAbsolutePath());
    final BinaryDictionary binaryDictionary =
        new BinaryDictionary(
            file.getAbsolutePath(),
            0 /* offset */,
            file.length(),
            true /* useFullEditDistance */,
            Locale.ENGLISH,
            dictName,
            false /* isUpdatable */);

    final HashSet<String> wordSet = new HashSet<>(words);
    final HashSet<Pair<String, String>> bigramSet = new HashSet<>();

    for (int i = 0; i < words.size(); i++) {
      final List<Integer> bigramList = bigrams.get(i);
      if (bigramList != null) {
        for (final Integer word1Index : bigramList) {
          final String word1 = words.get(word1Index);
          bigramSet.add(new Pair<>(words.get(i), word1));
        }
      }
    }
    int token = 0;
    do {
      final BinaryDictionary.GetNextWordPropertyResult result =
          binaryDictionary.getNextWordProperty(token);
      final WordProperty wordProperty = result.mWordProperty;
      final String word0 = wordProperty.mWord;
      assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability);
      wordSet.remove(word0);
      if (shortcuts.containsKey(word0)) {
        assertEquals(shortcuts.get(word0).size(), wordProperty.mShortcutTargets.size());
        final List<String> shortcutList = shortcuts.get(word0);
        assertNotNull(wordProperty.mShortcutTargets);
        for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
          assertTrue(shortcutList.contains(shortcutTarget.mWord));
          assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
          shortcutList.remove(shortcutTarget.mWord);
        }
        assertTrue(shortcutList.isEmpty());
      }
      for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
        final String word1 = wordProperty.mBigrams.get(j).mWord;
        final Pair<String, String> bigram = new Pair<>(word0, word1);
        assertTrue(bigramSet.contains(bigram));
        bigramSet.remove(bigram);
      }
      token = result.mNextToken;
    } while (token != 0);
    assertTrue(wordSet.isEmpty());
    assertTrue(bigramSet.isEmpty());
  }