예제 #1
0
  public TextAnalysis analyse(String text) throws ModelException {
    String[] words = semanticService.getWords(text);

    semanticService.lowercaseWords(words);

    List<String> uniqueWords = Strings.asList(semanticService.getUniqueWords(words));

    WordListPerspectiveQuery query = new WordListPerspectiveQuery().withWords(uniqueWords);

    List<WordListPerspective> list = modelService.list(query);

    List<String> unknownWords = Lists.newArrayList();

    Set<String> knownWords = new HashSet<String>();

    Multimap<String, String> wordsByLanguage = HashMultimap.create();

    for (WordListPerspective perspective : list) {
      String word = perspective.getText().toLowerCase();
      knownWords.add(word);
      if (perspective.getLanguage() != null) {
        wordsByLanguage.put(perspective.getLanguage(), word);
      }
    }

    Multiset<String> languages = wordsByLanguage.keys();
    String language = null;
    for (String lang : languages) {
      if (language == null
          || (wordsByLanguage.get(lang).size() > wordsByLanguage.get(language).size())) {
        language = lang;
      }
    }

    for (String word : uniqueWords) {
      if (!knownWords.contains(word)) {
        unknownWords.add(word);
      }
    }

    Locale possibleLocale = Locale.ENGLISH;
    String[] sentences = semanticService.getSentences(text, possibleLocale);

    TextAnalysis analysis = new TextAnalysis();
    analysis.setLanguage(language);
    analysis.setSentences(Strings.asList(sentences));
    analysis.setWordsByLanguage(wordsByLanguage.asMap());
    analysis.setUniqueWords(uniqueWords);
    analysis.setKnownWords(list);
    analysis.setUnknownWords(unknownWords);
    return analysis;
  }