/** * Index all the resources in a Jena Model to ES * * @param model the model to index * @param bulkRequest a BulkRequestBuilder * @param getPropLabel if set to true all URI property values will be indexed as their label. The * label is taken as the value of one of the properties set in {@link #uriDescriptionList}. */ private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) { long startTime = System.currentTimeMillis(); long bulkLength = 0; HashSet<Property> properties = new HashSet<Property>(); StmtIterator it = model.listStatements(); while (it.hasNext()) { Statement st = it.nextStatement(); Property prop = st.getPredicate(); String property = prop.toString(); if (rdfPropList.isEmpty() || (isWhitePropList && rdfPropList.contains(property)) || (!isWhitePropList && !rdfPropList.contains(property)) || (normalizeProp.containsKey(property))) { properties.add(prop); } } ResIterator resIt = model.listSubjects(); while (resIt.hasNext()) { Resource rs = resIt.nextResource(); Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel); bulkRequest.add( client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap))); bulkLength++; // We want to execute the bulk for every DEFAULT_BULK_SIZE requests if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) { BulkResponse bulkResponse = bulkRequest.execute().actionGet(); // After executing, flush the BulkRequestBuilder. bulkRequest = client.prepareBulk(); if (bulkResponse.hasFailures()) { processBulkResponseFailure(bulkResponse); } } } // Execute remaining requests if (bulkRequest.numberOfActions() > 0) { BulkResponse response = bulkRequest.execute().actionGet(); // Handle failure by iterating through each bulk response item if (response.hasFailures()) { processBulkResponseFailure(response); } } // Show time taken to index the documents logger.info( "Indexed {} documents on {}/{} in {} seconds", bulkLength, indexName, typeName, (System.currentTimeMillis() - startTime) / 1000.0); }
private static ResultSetRewindable unique(ResultSetRewindable results) { // VERY crude. Utilises the fact that bindings have value equality. List<Binding> x = new ArrayList<Binding>(); Set<Binding> seen = new HashSet<Binding>(); for (; results.hasNext(); ) { Binding b = results.nextBinding(); if (seen.contains(b)) continue; seen.add(b); x.add(b); } QueryIterator qIter = new QueryIterPlainWrapper(x.iterator()); ResultSet rs = new ResultSetStream(results.getResultVars(), ModelFactory.createDefaultModel(), qIter); return ResultSetFactory.makeRewindable(rs); }
private void LoadQueriesBtnMouseClicked( java.awt.event.MouseEvent evt) { // GEN-FIRST:event_LoadQueriesBtnMouseClicked // Algorithm /* * Goto queries' directory and load all queryfile.query to Combobox * Change local directory to repository diretory * List all file there * Try to load all file and add to combobox * * */ String sQueryRepositoryPath = "/home/natuan/Documents/OntologyMysql/QueriesRepository/"; File dir = new File(sQueryRepositoryPath); String[] children = dir.list(); if (children == null) { // Either dir does not exist or is not a directory } else { System.out.println("list files"); for (int i = 0; i < children.length; i++) { // Get filename of file or directory String filename = children[i]; String sContent = ReadWholeFileToString(sQueryRepositoryPath + filename); queryList.add(sContent); QueriesCmb.addItem(filename); SparqlTxtArea.setText(sContent); // System.out.println(filename); } QueriesCmb.setSelectedIndex(children.length - 1); } } // GEN-LAST:event_LoadQueriesBtnMouseClicked
/** Starts the harvester for queries and/or URLs */ public boolean runIndexAll() { logger.info( "Starting RDF harvester: endpoint [{}], queries [{}]," + "URIs [{}], index name [{}], typeName [{}]", rdfEndpoint, rdfQueries, rdfUris, indexName, typeName); while (true) { if (this.closed) { logger.info( "Ended harvest for endpoint [{}], queries [{}]," + "URIs [{}], index name {}, type name {}", rdfEndpoint, rdfQueries, rdfUris, indexName, typeName); return true; } /** Harvest from a SPARQL endpoint */ if (!rdfQueries.isEmpty()) { harvestFromEndpoint(); } /** Harvest from RDF dumps */ harvestFromDumps(); closed = true; } }
private ResultSetRewindable convertToStrings(ResultSetRewindable resultsActual) { List<Binding> bindings = new ArrayList<Binding>(); while (resultsActual.hasNext()) { Binding b = resultsActual.nextBinding(); BindingMap b2 = BindingFactory.create(); for (String vn : resultsActual.getResultVars()) { Var v = Var.alloc(vn); Node n = b.get(v); String s; if (n == null) s = ""; else if (n.isBlank()) s = "_:" + n.getBlankNodeLabel(); else s = NodeFunctions.str(n); b2.add(v, NodeFactory.createLiteral(s)); } bindings.add(b2); } ResultSet rs = new ResultSetStream( resultsActual.getResultVars(), null, new QueryIterPlainWrapper(bindings.iterator())); return ResultSetFactory.makeRewindable(rs); }
private void QueriesCmbItemStateChanged( java.awt.event.ItemEvent evt) { // GEN-FIRST:event_QueriesCmbItemStateChanged // TODO add your handling code here: int iQueryNumber = QueriesCmb.getSelectedIndex(); SparqlTxtArea.setText(queryList.get(iQueryNumber).toString()); } // GEN-LAST:event_QueriesCmbItemStateChanged
/** * Build a query returning all triples in which members of uris are the subjects of the triplets. * * <p>If toDescribeURIs is true the query will automatically add logic to retrieve the labels * directly from the SPARQL endpoint. * * @param uris URIs for queried resources * @return a CONSTRUCT query string */ private String getSyncQueryStr(Iterable<String> uris) { StringBuilder uriSetStrBuilder = new StringBuilder(); String delimiter = ""; uriSetStrBuilder.append("("); for (String uri : uris) { uriSetStrBuilder.append(delimiter).append(String.format("<%s>", uri)); delimiter = ", "; } uriSetStrBuilder.append(")"); String uriSet = uriSetStrBuilder.toString(); /* Get base triplets having any element from uris as subject */ StringBuilder queryBuilder = new StringBuilder(); queryBuilder .append("CONSTRUCT { ?s ?p ?o } WHERE {") .append("{?s ?p ?o") .append(String.format(" . FILTER (?s in %s )", uriSet)); /* Perform uri label resolution only if desired */ if (uriDescriptionList.isEmpty()) { queryBuilder.append("}}"); return queryBuilder.toString(); } /* Filter out properties having a label */ int index = 0; for (String prop : uriDescriptionList) { index++; String filterTemplate = " . OPTIONAL { ?o <%s> ?o%d } " + " . FILTER(!BOUND(?o%d))"; queryBuilder.append(String.format(filterTemplate, prop, index, index)); } queryBuilder.append("}"); /* We need this redundant clause as UNION queries can't handle sub-selects * without a prior clause. */ String redundantClause = "<http://www.w3.org/2000/01/rdf-schema#Class> " + "a <http://www.w3.org/2000/01/rdf-schema#Class>"; /* Add labels for filtered out properties */ for (String prop : uriDescriptionList) { /* Resolve ?o as str(?label) for the resource ?res * label is taken as being ?res <prop> ?label * * We need to take str(?label) in order to drop * language references of the terms so that the document * is indexed with a language present only in it's top-level * properties. * * As some Virtuoso versions do not allow the usage * of BIND so we have to create a sub-select in order to bind * ?o to str(?label) * * The sub-select works only with a prior clause. * We are using a redundant clause that is always true */ String partQueryTemplate = " UNION " + "{ " + redundantClause + " . " + "{ SELECT ?s ?p (str(?label) as ?o) { " + " ?s ?p ?res" + " . FILTER (?s in %s)" + " . ?res <%s> ?label }}}"; queryBuilder.append(String.format(partQueryTemplate, uriSet, prop)); } queryBuilder.append("}"); return queryBuilder.toString(); }
/** * Sets the {@link Harvester}'s {@link #rdfPropList} parameter * * @param list - a list of properties names that are either required in the object description, or * undesired, depending on its {@link #isWhitePropList} * @return the same {@link Harvester} with the {@link #rdfPropList} parameter set */ public Harvester rdfPropList(List<String> list) { if (!list.isEmpty()) { rdfPropList = new ArrayList<String>(list); } return this; }