public TextAnalysis analyse(String text) throws ModelException { String[] words = semanticService.getWords(text); semanticService.lowercaseWords(words); List<String> uniqueWords = Strings.asList(semanticService.getUniqueWords(words)); WordListPerspectiveQuery query = new WordListPerspectiveQuery().withWords(uniqueWords); List<WordListPerspective> list = modelService.list(query); List<String> unknownWords = Lists.newArrayList(); Set<String> knownWords = new HashSet<String>(); Multimap<String, String> wordsByLanguage = HashMultimap.create(); for (WordListPerspective perspective : list) { String word = perspective.getText().toLowerCase(); knownWords.add(word); if (perspective.getLanguage() != null) { wordsByLanguage.put(perspective.getLanguage(), word); } } Multiset<String> languages = wordsByLanguage.keys(); String language = null; for (String lang : languages) { if (language == null || (wordsByLanguage.get(lang).size() > wordsByLanguage.get(language).size())) { language = lang; } } for (String word : uniqueWords) { if (!knownWords.contains(word)) { unknownWords.add(word); } } Locale possibleLocale = Locale.ENGLISH; String[] sentences = semanticService.getSentences(text, possibleLocale); TextAnalysis analysis = new TextAnalysis(); analysis.setLanguage(language); analysis.setSentences(Strings.asList(sentences)); analysis.setWordsByLanguage(wordsByLanguage.asMap()); analysis.setUniqueWords(uniqueWords); analysis.setKnownWords(list); analysis.setUnknownWords(unknownWords); return analysis; }
public Counter<String> countLanguages(List<WordListPerspective> perspectives) { Multimap<String, String> wordsToLanguages = HashMultimap.create(); for (WordListPerspective perspective : perspectives) { if (perspective.getLanguage() != null) { wordsToLanguages.put(perspective.getText().toLowerCase(), perspective.getLanguage()); } } Counter<String> languageCounts = new Counter<String>(); Set<String> set = wordsToLanguages.keySet(); for (String word : set) { Collection<String> langs = wordsToLanguages.get(word); for (String lang : langs) { languageCounts.addOne(lang); } } return languageCounts; }