/**
   * Parse a JSON response to extract an entity document.
   *
   * <p>TODO This method currently contains code to work around Wikibase issue
   * https://phabricator.wikimedia.org/T73349. This should be removed once the issue is fixed.
   *
   * @param entityNode the JSON node that should contain the entity document data
   * @return the entitiy document, or null if there were unrecoverable errors
   * @throws IOException
   * @throws JsonProcessingException
   */
  private EntityDocument parseJsonResponse(JsonNode entityNode)
      throws JsonProcessingException, IOException {
    try {
      JacksonTermedStatementDocument ed =
          mapper.treeToValue(entityNode, JacksonTermedStatementDocument.class);
      ed.setSiteIri(this.siteIri);

      return ed;
    } catch (JsonProcessingException e) {
      logger.warn(
          "Error when reading JSON for entity "
              + entityNode.path("id").asText("UNKNOWN")
              + ": "
              + e.toString()
              + "\nTrying to manually fix issue https://phabricator.wikimedia.org/T73349.");
      String jsonString = entityNode.toString();
      jsonString =
          jsonString
              .replace("\"sitelinks\":[]", "\"sitelinks\":{}")
              .replace("\"labels\":[]", "\"labels\":{}")
              .replace("\"aliases\":[]", "\"aliases\":{}")
              .replace("\"claims\":[]", "\"claims\":{}")
              .replace("\"descriptions\":[]", "\"descriptions\":{}");

      ObjectReader documentReader = this.mapper.reader(JacksonTermedStatementDocument.class);

      JacksonTermedStatementDocument ed;
      ed = documentReader.readValue(jsonString);
      ed.setSiteIri(this.siteIri);
      return ed;
    }
  }
Пример #2
0
  public WikidataEntity parse(String json) throws WpParseException {
    JacksonTermedStatementDocument mwDoc;

    try {
      mwDoc = mapper.readValue(json, JacksonTermedStatementDocument.class);
    } catch (IOException e) {
      LOG.info("Error parsing: " + json);
      throw new WpParseException(e);
    }

    WikidataEntity record = new WikidataEntity(mwDoc.getEntityId().getId());

    // Aliases (multiple per language)
    for (List<MonolingualTextValue> vlist : mwDoc.getAliases().values()) {
      if (vlist.isEmpty()) continue;
      if (!validLanguage(vlist.get(0).getLanguageCode())) continue;
      Language lang = Language.getByLangCodeLenient(vlist.get(0).getLanguageCode());
      record.getAliases().put(lang, new ArrayList<String>());
      for (MonolingualTextValue v : vlist) {
        record.getAliases().get(lang).add(v.getText());
      }
    }

    // Descriptions (one per language)
    for (MonolingualTextValue v : mwDoc.getDescriptions().values()) {
      if (validLanguage(v.getLanguageCode())) {
        Language lang = Language.getByLangCodeLenient(v.getLanguageCode());
        record.getDescriptions().put(lang, v.getText());
      }
    }

    // Labels (one per language)
    for (MonolingualTextValue v : mwDoc.getLabels().values()) {
      if (validLanguage(v.getLanguageCode())) {
        Language lang = Language.getByLangCodeLenient(v.getLanguageCode());
        record.getLabels().put(lang, v.getText());
      }
    }

    // Claims (only for Item entities)
    if (mwDoc instanceof JacksonItemDocument) {
      for (List<JacksonStatement> statements :
          ((JacksonItemDocument) mwDoc).getJsonClaims().values()) {
        for (JacksonStatement s : statements) {
          record.getStatements().add(parseOneClaim(record, s));
        }
      }
    }

    return record;
  }
  /**
   * Creates a map of identifiers or page titles to documents retrieved via the API. All parameters
   * that accept lists expect the pipe character | to be used as a separator, as created by {@link
   * ApiConnection#implodeObjects(Iterable)}. There is a limit on how many entities can be retrieved
   * in one request, usually 50 by default and 500 for bots. This limit may also apply to the number
   * of language codes and sites used for filtering.
   *
   * <p>The method can fail in two ways. If errors occur (e.g., exceptions trying to access the Web
   * API), then the errors will be logged and null will be returned. If the API the request is made
   * but the API returns errors, then the errors will be logged and an empty map is returned.
   *
   * @param ids list of ids of entities for which data should be retrieved
   * @param sites site key (e.g. "enwiki"); used together with parameters "titles"; the API supports
   *     the use of many site keys with a single title, but this implementation does not support
   *     this (the resulting map will use title strings for keys)
   * @param titles list of titles of the page corresponding to the requested entities on the given
   *     site; use together with 'sites', but only give one site for several titles or several sites
   *     for one title
   * @param props list of strings that specifies what kind of data should be retrieved for each
   *     entity; possible values include "info", "sitelinks", "sitelinks/urls", "aliases", "labels",
   *     "descriptions", "claims" (statements), "datatype"; additional filters may apply; defaults
   *     to "info|sitelinks|aliases|labels|descriptions|claims|datatype"
   * @param languages list of language codes to return labels, aliases or descriptions for; if
   *     omitted, data for all languages is returned
   * @param sitefilter list of site keys to return sitelinks for; if omitted, data for all languages
   *     is returned
   * @return map of document identifiers or titles to documents retrieved via the API URL, or null
   *     if there were errors
   * @throws MediaWikiApiErrorException if the API returns an error
   * @throws IllegalArgumentException if the given combination of parameters does not make sense
   */
  public Map<String, EntityDocument> wbGetEntities(
      String ids, String sites, String titles, String props, String languages, String sitefilter)
      throws MediaWikiApiErrorException {

    Map<String, String> parameters = new HashMap<String, String>();
    parameters.put(ApiConnection.PARAM_ACTION, "wbgetentities");

    if (ids != null) {
      parameters.put("ids", ids);
      if (titles != null || sites != null) {
        throw new IllegalArgumentException(
            "Cannot use parameters \"sites\" or \"titles\" when using ids to get entity data");
      }
    } else if (titles != null) {
      parameters.put("titles", titles);
      if (sites == null) {
        throw new IllegalArgumentException(
            "Sites parameter is required when using titles parameter to get entity data.");
      }
      parameters.put("sites", sites);
    } else {
      throw new IllegalArgumentException(
          "Either ids, or titles and site must be specified for this action.");
    }

    if (props != null) {
      parameters.put("props", props);
    }

    if (languages != null) {
      parameters.put("languages", languages);
    }
    if (sitefilter != null) {
      parameters.put("sitefilter", sitefilter);
    }

    parameters.put(ApiConnection.PARAM_FORMAT, "json");

    try (InputStream response = this.connection.sendRequest("POST", parameters)) {
      JsonNode root = mapper.readTree(response);
      Map<String, EntityDocument> result = new HashMap<String, EntityDocument>();

      this.connection.checkErrors(root);
      this.connection.logWarnings(root);

      JsonNode entities = root.path("entities");
      for (JsonNode entityNode : entities) {
        if (!entityNode.has("missing")) {
          try {
            JacksonTermedStatementDocument ed =
                mapper.treeToValue(entityNode, JacksonTermedStatementDocument.class);
            ed.setSiteIri(this.siteIri);

            if (titles == null) {
              result.put(ed.getEntityId().getId(), ed);
            } else {
              if (ed instanceof JacksonItemDocument
                  && ((JacksonItemDocument) ed).getSiteLinks().containsKey(sites)) {
                result.put(((JacksonItemDocument) ed).getSiteLinks().get(sites).getPageTitle(), ed);
              }
            }
          } catch (JsonProcessingException e) {
            logger.error(
                "Error when reading JSON for entity "
                    + entityNode.path("id").asText("UNKNOWN")
                    + ": "
                    + e.toString());
          }
        }
      }
      return result;
    } catch (IOException e) {
      logger.error("Could not retrive data: " + e.toString());
      return null;
    }
  }