private void findRecommendations( @NotNull Document doc, @NotNull BilingualQueryResultBuilder resultBuilder) { // Determine all candidate nodes: Elements alternativeNodes = doc.select("div.cc > p > *"); Language currentLanguage = null; for (Element node : alternativeNodes) { // If the next node is a flagicon, try to determine the language for the next entries from the // class name if (node.tagName().equals("span") && node.hasClass("flagicon")) { Set<String> classNames = node.classNames(); classNames.remove("flagicon"); for (String className : classNames) { Language candidate = Language.getExistingLanguageById(className); if (candidate != null) { currentLanguage = candidate; break; } } } else if (node.tagName().equals("a")) { String recommendationText = node.text(); DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder(); objectBuilder.setLanguage(currentLanguage).setGeneralForm(recommendationText); resultBuilder.addSimilarRecommendation(objectBuilder.build()); } } }
private void extractBilingualSynonyms( @NotNull Element translationTable, @NotNull BilingualQueryResultBuilder resultBuilder, @NotNull Language sourceLanguage) { Elements synonymNodes = translationTable.select("tr.translationInlineSynonymsRow"); if (synonymNodes.size() == 0) { LOGGER.debug("No synonym entries found"); return; } String synonymEntryTitle = translationTable.select(".translationInlineSynonymsTitle span.highlight").first().text(); Map<String, SynonymGroupBuilder> synonymGroupMap = new HashMap<>(); for (Element synonymNode : synonymNodes) { // Extract only information from the "from"-node (i.e. source language) Element fromNode = synonymNode.getElementsByClass("from").first(); DictionaryObject newSynonym = processSingleNode(fromNode, sourceLanguage, synonymEntryTitle); String groupName = newSynonym.getDescription(); if (groupName != null) { SynonymGroupBuilder groupBuilder = synonymGroupMap.computeIfAbsent( groupName, (s) -> ImmutableSynonymGroup.builder() .setBaseMeaning( ImmutableDictionaryObject.createSimpleObject(sourceLanguage, s))); groupBuilder.addSynonym(newSynonym); } else { LOGGER.warn("Synonym group is null"); } } SynonymEntryBuilder synonymEntryBuilder = ImmutableSynonymEntry.builder() .setBaseObject( ImmutableDictionaryObject.createSimpleObject(sourceLanguage, synonymEntryTitle)); for (SynonymGroupBuilder synonymGroupBuilder : synonymGroupMap.values()) { synonymEntryBuilder.addSynonymGroup(synonymGroupBuilder.build()); } resultBuilder.addSynonymEntry(synonymEntryBuilder.build()); }
@NotNull private DictionaryObject processSingleNode( @NotNull Element element, @NotNull Language language, String queryString) { DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder(); objectBuilder.setLanguage(language); // Extract entry text: String context = StringUtils.substringBefore(element.text(), element.getElementsByTag("a").first().text()); String generalForm = context + element.getElementsByTag("a").first().text(); objectBuilder.setGeneralForm(StringUtils.strip(generalForm)); // Extract description: extractDescription(element, queryString, objectBuilder); // Extract gender: extractGender(element, objectBuilder); return objectBuilder.build(); }