/** * Parse a JSON response to extract an entity document. * * <p>TODO This method currently contains code to work around Wikibase issue * https://phabricator.wikimedia.org/T73349. This should be removed once the issue is fixed. * * @param entityNode the JSON node that should contain the entity document data * @return the entitiy document, or null if there were unrecoverable errors * @throws IOException * @throws JsonProcessingException */ private EntityDocument parseJsonResponse(JsonNode entityNode) throws JsonProcessingException, IOException { try { JacksonTermedStatementDocument ed = mapper.treeToValue(entityNode, JacksonTermedStatementDocument.class); ed.setSiteIri(this.siteIri); return ed; } catch (JsonProcessingException e) { logger.warn( "Error when reading JSON for entity " + entityNode.path("id").asText("UNKNOWN") + ": " + e.toString() + "\nTrying to manually fix issue https://phabricator.wikimedia.org/T73349."); String jsonString = entityNode.toString(); jsonString = jsonString .replace("\"sitelinks\":[]", "\"sitelinks\":{}") .replace("\"labels\":[]", "\"labels\":{}") .replace("\"aliases\":[]", "\"aliases\":{}") .replace("\"claims\":[]", "\"claims\":{}") .replace("\"descriptions\":[]", "\"descriptions\":{}"); ObjectReader documentReader = this.mapper.reader(JacksonTermedStatementDocument.class); JacksonTermedStatementDocument ed; ed = documentReader.readValue(jsonString); ed.setSiteIri(this.siteIri); return ed; } }
public WikidataEntity parse(String json) throws WpParseException { JacksonTermedStatementDocument mwDoc; try { mwDoc = mapper.readValue(json, JacksonTermedStatementDocument.class); } catch (IOException e) { LOG.info("Error parsing: " + json); throw new WpParseException(e); } WikidataEntity record = new WikidataEntity(mwDoc.getEntityId().getId()); // Aliases (multiple per language) for (List<MonolingualTextValue> vlist : mwDoc.getAliases().values()) { if (vlist.isEmpty()) continue; if (!validLanguage(vlist.get(0).getLanguageCode())) continue; Language lang = Language.getByLangCodeLenient(vlist.get(0).getLanguageCode()); record.getAliases().put(lang, new ArrayList<String>()); for (MonolingualTextValue v : vlist) { record.getAliases().get(lang).add(v.getText()); } } // Descriptions (one per language) for (MonolingualTextValue v : mwDoc.getDescriptions().values()) { if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getDescriptions().put(lang, v.getText()); } } // Labels (one per language) for (MonolingualTextValue v : mwDoc.getLabels().values()) { if (validLanguage(v.getLanguageCode())) { Language lang = Language.getByLangCodeLenient(v.getLanguageCode()); record.getLabels().put(lang, v.getText()); } } // Claims (only for Item entities) if (mwDoc instanceof JacksonItemDocument) { for (List<JacksonStatement> statements : ((JacksonItemDocument) mwDoc).getJsonClaims().values()) { for (JacksonStatement s : statements) { record.getStatements().add(parseOneClaim(record, s)); } } } return record; }
/** * Creates a map of identifiers or page titles to documents retrieved via the API. All parameters * that accept lists expect the pipe character | to be used as a separator, as created by {@link * ApiConnection#implodeObjects(Iterable)}. There is a limit on how many entities can be retrieved * in one request, usually 50 by default and 500 for bots. This limit may also apply to the number * of language codes and sites used for filtering. * * <p>The method can fail in two ways. If errors occur (e.g., exceptions trying to access the Web * API), then the errors will be logged and null will be returned. If the API the request is made * but the API returns errors, then the errors will be logged and an empty map is returned. * * @param ids list of ids of entities for which data should be retrieved * @param sites site key (e.g. "enwiki"); used together with parameters "titles"; the API supports * the use of many site keys with a single title, but this implementation does not support * this (the resulting map will use title strings for keys) * @param titles list of titles of the page corresponding to the requested entities on the given * site; use together with 'sites', but only give one site for several titles or several sites * for one title * @param props list of strings that specifies what kind of data should be retrieved for each * entity; possible values include "info", "sitelinks", "sitelinks/urls", "aliases", "labels", * "descriptions", "claims" (statements), "datatype"; additional filters may apply; defaults * to "info|sitelinks|aliases|labels|descriptions|claims|datatype" * @param languages list of language codes to return labels, aliases or descriptions for; if * omitted, data for all languages is returned * @param sitefilter list of site keys to return sitelinks for; if omitted, data for all languages * is returned * @return map of document identifiers or titles to documents retrieved via the API URL, or null * if there were errors * @throws MediaWikiApiErrorException if the API returns an error * @throws IllegalArgumentException if the given combination of parameters does not make sense */ public Map<String, EntityDocument> wbGetEntities( String ids, String sites, String titles, String props, String languages, String sitefilter) throws MediaWikiApiErrorException { Map<String, String> parameters = new HashMap<String, String>(); parameters.put(ApiConnection.PARAM_ACTION, "wbgetentities"); if (ids != null) { parameters.put("ids", ids); if (titles != null || sites != null) { throw new IllegalArgumentException( "Cannot use parameters \"sites\" or \"titles\" when using ids to get entity data"); } } else if (titles != null) { parameters.put("titles", titles); if (sites == null) { throw new IllegalArgumentException( "Sites parameter is required when using titles parameter to get entity data."); } parameters.put("sites", sites); } else { throw new IllegalArgumentException( "Either ids, or titles and site must be specified for this action."); } if (props != null) { parameters.put("props", props); } if (languages != null) { parameters.put("languages", languages); } if (sitefilter != null) { parameters.put("sitefilter", sitefilter); } parameters.put(ApiConnection.PARAM_FORMAT, "json"); try (InputStream response = this.connection.sendRequest("POST", parameters)) { JsonNode root = mapper.readTree(response); Map<String, EntityDocument> result = new HashMap<String, EntityDocument>(); this.connection.checkErrors(root); this.connection.logWarnings(root); JsonNode entities = root.path("entities"); for (JsonNode entityNode : entities) { if (!entityNode.has("missing")) { try { JacksonTermedStatementDocument ed = mapper.treeToValue(entityNode, JacksonTermedStatementDocument.class); ed.setSiteIri(this.siteIri); if (titles == null) { result.put(ed.getEntityId().getId(), ed); } else { if (ed instanceof JacksonItemDocument && ((JacksonItemDocument) ed).getSiteLinks().containsKey(sites)) { result.put(((JacksonItemDocument) ed).getSiteLinks().get(sites).getPageTitle(), ed); } } } catch (JsonProcessingException e) { logger.error( "Error when reading JSON for entity " + entityNode.path("id").asText("UNKNOWN") + ": " + e.toString()); } } } return result; } catch (IOException e) { logger.error("Could not retrive data: " + e.toString()); return null; } }