예제 #1
0
  private void findRecommendations(
      @NotNull Document doc, @NotNull BilingualQueryResultBuilder resultBuilder) {
    // Determine all candidate nodes:
    Elements alternativeNodes = doc.select("div.cc > p > *");

    Language currentLanguage = null;

    for (Element node : alternativeNodes) {
      // If the next node is a flagicon, try to determine the language for the next entries from the
      // class name
      if (node.tagName().equals("span") && node.hasClass("flagicon")) {
        Set<String> classNames = node.classNames();
        classNames.remove("flagicon");
        for (String className : classNames) {
          Language candidate = Language.getExistingLanguageById(className);
          if (candidate != null) {
            currentLanguage = candidate;
            break;
          }
        }
      } else if (node.tagName().equals("a")) {
        String recommendationText = node.text();

        DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder();
        objectBuilder.setLanguage(currentLanguage).setGeneralForm(recommendationText);

        resultBuilder.addSimilarRecommendation(objectBuilder.build());
      }
    }
  }
예제 #2
0
  private void extractBilingualSynonyms(
      @NotNull Element translationTable,
      @NotNull BilingualQueryResultBuilder resultBuilder,
      @NotNull Language sourceLanguage) {
    Elements synonymNodes = translationTable.select("tr.translationInlineSynonymsRow");

    if (synonymNodes.size() == 0) {
      LOGGER.debug("No synonym entries found");
      return;
    }

    String synonymEntryTitle =
        translationTable.select(".translationInlineSynonymsTitle span.highlight").first().text();

    Map<String, SynonymGroupBuilder> synonymGroupMap = new HashMap<>();

    for (Element synonymNode : synonymNodes) {
      // Extract only information from the "from"-node (i.e. source language)
      Element fromNode = synonymNode.getElementsByClass("from").first();
      DictionaryObject newSynonym = processSingleNode(fromNode, sourceLanguage, synonymEntryTitle);
      String groupName = newSynonym.getDescription();
      if (groupName != null) {
        SynonymGroupBuilder groupBuilder =
            synonymGroupMap.computeIfAbsent(
                groupName,
                (s) ->
                    ImmutableSynonymGroup.builder()
                        .setBaseMeaning(
                            ImmutableDictionaryObject.createSimpleObject(sourceLanguage, s)));
        groupBuilder.addSynonym(newSynonym);
      } else {
        LOGGER.warn("Synonym group is null");
      }
    }

    SynonymEntryBuilder synonymEntryBuilder =
        ImmutableSynonymEntry.builder()
            .setBaseObject(
                ImmutableDictionaryObject.createSimpleObject(sourceLanguage, synonymEntryTitle));

    for (SynonymGroupBuilder synonymGroupBuilder : synonymGroupMap.values()) {
      synonymEntryBuilder.addSynonymGroup(synonymGroupBuilder.build());
    }

    resultBuilder.addSynonymEntry(synonymEntryBuilder.build());
  }
예제 #3
0
  @NotNull
  private DictionaryObject processSingleNode(
      @NotNull Element element, @NotNull Language language, String queryString) {
    DictionaryObjectBuilder objectBuilder = ImmutableDictionaryObject.builder();
    objectBuilder.setLanguage(language);

    // Extract entry text:
    String context =
        StringUtils.substringBefore(element.text(), element.getElementsByTag("a").first().text());
    String generalForm = context + element.getElementsByTag("a").first().text();
    objectBuilder.setGeneralForm(StringUtils.strip(generalForm));

    // Extract description:
    extractDescription(element, queryString, objectBuilder);

    // Extract gender:
    extractGender(element, objectBuilder);

    return objectBuilder.build();
  }