private Optional<org.wikidata.wdtk.datamodel.interfaces.Statement> processGDMStatement( final Statement statement) { final Predicate gdmPredicate = statement.getPredicate(); final Node gdmObject = statement.getObject(); final String propertyValueDataType; final NodeType gdmObjectType = gdmObject.getType(); propertyValueDataType = determineWikidataPropertyValueDataType(gdmPredicate, gdmObjectType); final PropertyIdValue wikidataProperty = processGDMPredicate(gdmPredicate, propertyValueDataType); final Optional<Value> optionalWikidataValue = processGDMObject(gdmObject); if (!optionalWikidataValue.isPresent()) { return Optional.empty(); } final Value wikidataValue = optionalWikidataValue.get(); // create property value pair final ValueSnak snak = Datamodel.makeValueSnak(wikidataProperty, wikidataValue); // process qualified attributes at GDM statement final Optional<List<Snak>> wikidataQualifiers = processGDMQualifiedAttributes(statement); final List<SnakGroup> snakGroups = new ArrayList<>(); if (wikidataQualifiers.isPresent()) { final SnakGroup snakGroup = Datamodel.makeSnakGroup(wikidataQualifiers.get()); snakGroups.add(snakGroup); } final Claim claim = Datamodel.makeClaim(null, snak, snakGroups); final List<Reference> references = new ArrayList<>(); final StatementRank rank = StatementRank.NORMAL; // note: empty string for statement id (this should be utilised for statements that are created) // note: Statement references cannot be null // note: Statement rank cannot be null return Optional.ofNullable(Datamodel.makeStatement(claim, references, rank, "")); }
private static String printGDMStatement(final Statement statement) { final StringBuilder sb = new StringBuilder(); final Long id = statement.getId(); sb.append("{statement: id ='"); if (id != null) { sb.append(id); } else { sb.append("no statement id available"); } sb.append("' :: "); final String uuid = statement.getUUID(); sb.append("uuid = '"); if (uuid != null) { sb.append(uuid); } else { sb.append("no uuid available"); } sb.append("' :: "); final String subject = printGDMNode(statement.getSubject()); sb.append("subject = '").append(subject).append("' :: "); final String predicateURI = statement.getPredicate().getUri(); sb.append("predicate = '").append(predicateURI).append("' :: "); final String object = printGDMNode(statement.getObject()); sb.append("object = '").append(object).append("'}"); return sb.toString(); }
private void processGDMResource(final Resource resource) throws JsonProcessingException, WikidataImporterException { resourceCount.incrementAndGet(); final String resourceURI = resource.getUri(); final List<MonolingualTextValue> labels = generateLabels(resource); final List<MonolingualTextValue> descriptions = generateLabels(resourceURI); final List<MonolingualTextValue> aliases = new ArrayList<>(); final Map<String, List<org.wikidata.wdtk.datamodel.interfaces.Statement>> wikidataStatementsMap = new HashMap<>(); final Set<Statement> gdmStatements = resource.getStatements(); if (gdmStatements != null) { // write statements (if available) for (final Statement gdmStatement : gdmStatements) { statementCount.incrementAndGet(); final String predicateURI = gdmStatement.getPredicate().getUri(); if (!wikidataStatementsMap.containsKey(predicateURI)) { final List<org.wikidata.wdtk.datamodel.interfaces.Statement> wikidataStatements = new ArrayList<>(); wikidataStatementsMap.put(predicateURI, wikidataStatements); } final Optional<org.wikidata.wdtk.datamodel.interfaces.Statement> optionalWikidataStmt = processGDMStatement(gdmStatement); if (!optionalWikidataStmt.isPresent()) { // log non-created statements LOG.debug("could not process statement '{}'", printGDMStatement(gdmStatement)); continue; } final org.wikidata.wdtk.datamodel.interfaces.Statement wikidataStmt = optionalWikidataStmt.get(); wikidataStatementsMap.get(predicateURI).add(wikidataStmt); processedStatementCount.incrementAndGet(); final boolean updated = checkAndOptionallyUpdateBigCounter(statementCount, bigStatementCount); if (updated) { final long currentStatementCount = statementCount.get(); LOG.info( "processed '{}' from '{}' statements", processedStatementCount.get(), currentStatementCount); } } } final List<StatementGroup> statementGroups = new ArrayList<>(); // create statement groups property-wise for (final Map.Entry<String, List<org.wikidata.wdtk.datamodel.interfaces.Statement>> statmentsPerPropertyEntry : wikidataStatementsMap.entrySet()) { final List<org.wikidata.wdtk.datamodel.interfaces.Statement> statementsPerProperty = statmentsPerPropertyEntry.getValue(); final StatementGroup statementGroup = Datamodel.makeStatementGroup(statementsPerProperty); statementGroups.add(statementGroup); } final Map<String, SiteLink> siteLinkMap = new HashMap<>(); // we can also create an item with all it's statements at once, i.e., this would save some HTTP // API calls // TODO: check ItemIdValue in map (i.e. whether an wikidata for this gdm resource exists // already, or not; because if it exists already, then we need to update the existing one, i.e., // do a slightly different API call) final ItemDocument wikidataItem = Datamodel.makeItemDocument( null, labels, descriptions, aliases, statementGroups, siteLinkMap); // create item at wikibase (check whether statements are created as well - otherwise we need to // create them separately) final ItemIdValue itemIdValue = createWikidataItem(resourceURI, wikidataItem); // add/update item id value at the resources items map gdmResourceURIWikidataItemMap.putIfAbsent(resourceURI, itemIdValue); final boolean updated = checkAndOptionallyUpdateBigCounter(resourceCount, bigResourceCount); if (updated) { final long currentResourceCount = resourceCount.get(); LOG.info( "processed '{}' resources ('{}' from '{}' statements)", currentResourceCount, processedStatementCount.get(), statementCount.get()); } }