Пример #1
0
  public TextAnalysis analyse(String text) throws ModelException {
    String[] words = semanticService.getWords(text);

    semanticService.lowercaseWords(words);

    List<String> uniqueWords = Strings.asList(semanticService.getUniqueWords(words));

    WordListPerspectiveQuery query = new WordListPerspectiveQuery().withWords(uniqueWords);

    List<WordListPerspective> list = modelService.list(query);

    List<String> unknownWords = Lists.newArrayList();

    Set<String> knownWords = new HashSet<String>();

    Multimap<String, String> wordsByLanguage = HashMultimap.create();

    for (WordListPerspective perspective : list) {
      String word = perspective.getText().toLowerCase();
      knownWords.add(word);
      if (perspective.getLanguage() != null) {
        wordsByLanguage.put(perspective.getLanguage(), word);
      }
    }

    Multiset<String> languages = wordsByLanguage.keys();
    String language = null;
    for (String lang : languages) {
      if (language == null
          || (wordsByLanguage.get(lang).size() > wordsByLanguage.get(language).size())) {
        language = lang;
      }
    }

    for (String word : uniqueWords) {
      if (!knownWords.contains(word)) {
        unknownWords.add(word);
      }
    }

    Locale possibleLocale = Locale.ENGLISH;
    String[] sentences = semanticService.getSentences(text, possibleLocale);

    TextAnalysis analysis = new TextAnalysis();
    analysis.setLanguage(language);
    analysis.setSentences(Strings.asList(sentences));
    analysis.setWordsByLanguage(wordsByLanguage.asMap());
    analysis.setUniqueWords(uniqueWords);
    analysis.setKnownWords(list);
    analysis.setUnknownWords(unknownWords);
    return analysis;
  }
Пример #2
0
  public Counter<String> countLanguages(List<WordListPerspective> perspectives) {
    Multimap<String, String> wordsToLanguages = HashMultimap.create();
    for (WordListPerspective perspective : perspectives) {
      if (perspective.getLanguage() != null) {
        wordsToLanguages.put(perspective.getText().toLowerCase(), perspective.getLanguage());
      }
    }

    Counter<String> languageCounts = new Counter<String>();
    Set<String> set = wordsToLanguages.keySet();
    for (String word : set) {
      Collection<String> langs = wordsToLanguages.get(word);
      for (String lang : langs) {
        languageCounts.addOne(lang);
      }
    }
    return languageCounts;
  }