Пример #1
0
 /**
  * Self configuration Properties(target) - Properties(source) Find properties in target which are
  * not in source
  *
  * @param source
  * @param target
  * @return Map of (key, value) pairs of self configured parameters
  * @author sherif
  */
 public Map<String, String> selfConfig(Model source, Model target) {
   Map<String, String> parameters = new HashMap<String, String>();
   Set<Property> properties = getPropertyDifference(source, target);
   int propertyNr = 1;
   for (Property p : properties) {
     parameters.put(INPUT_PROPERTY + propertyNr, p.toString());
     parameters.put(OUTPUT_PROPERTY + propertyNr, p.toString());
     propertyNr++;
   }
   //		logger.info("Self configuration: " + parameters);
   if (parameters.size() == 0) {
     return null;
   }
   return parameters;
 }
  /**
   * Index all the resources in a Jena Model to ES
   *
   * @param model the model to index
   * @param bulkRequest a BulkRequestBuilder
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   */
  private void addModelToES(Model model, BulkRequestBuilder bulkRequest, boolean getPropLabel) {
    long startTime = System.currentTimeMillis();
    long bulkLength = 0;
    HashSet<Property> properties = new HashSet<Property>();

    StmtIterator it = model.listStatements();
    while (it.hasNext()) {
      Statement st = it.nextStatement();
      Property prop = st.getPredicate();
      String property = prop.toString();

      if (rdfPropList.isEmpty()
          || (isWhitePropList && rdfPropList.contains(property))
          || (!isWhitePropList && !rdfPropList.contains(property))
          || (normalizeProp.containsKey(property))) {
        properties.add(prop);
      }
    }

    ResIterator resIt = model.listSubjects();

    while (resIt.hasNext()) {
      Resource rs = resIt.nextResource();
      Map<String, ArrayList<String>> jsonMap = getJsonMap(rs, properties, model, getPropLabel);

      bulkRequest.add(
          client.prepareIndex(indexName, typeName, rs.toString()).setSource(mapToString(jsonMap)));
      bulkLength++;

      // We want to execute the bulk for every  DEFAULT_BULK_SIZE requests
      if (bulkLength % EEASettings.DEFAULT_BULK_SIZE == 0) {
        BulkResponse bulkResponse = bulkRequest.execute().actionGet();
        // After executing, flush the BulkRequestBuilder.
        bulkRequest = client.prepareBulk();

        if (bulkResponse.hasFailures()) {
          processBulkResponseFailure(bulkResponse);
        }
      }
    }

    // Execute remaining requests
    if (bulkRequest.numberOfActions() > 0) {
      BulkResponse response = bulkRequest.execute().actionGet();
      // Handle failure by iterating through each bulk response item
      if (response.hasFailures()) {
        processBulkResponseFailure(response);
      }
    }

    // Show time taken to index the documents
    logger.info(
        "Indexed {} documents on {}/{} in {} seconds",
        bulkLength,
        indexName,
        typeName,
        (System.currentTimeMillis() - startTime) / 1000.0);
  }
  public static void main(String[] args) {

    List<String> obj = new ArrayList<String>();

    Scanner input = new Scanner(System.in);

    System.out.print("Enter URI: ");

    String userIn = input.nextLine();

    // create an empty Model
    Model model = ModelFactory.createDefaultModel();

    // read the RDF/XML file
    model.read(userIn);

    // write it to standard out
    // model.write(System.out);

    // list the statements in the Model
    StmtIterator iter = model.listStatements();

    System.out.println();

    // print out the predicate, subject and object of each statement
    while (iter.hasNext()) {
      Statement stmt = iter.nextStatement(); // get next statement
      Resource subject = stmt.getSubject(); // get the subject
      Property predicate = stmt.getPredicate(); // get the predicate
      RDFNode object = stmt.getObject(); // get the object

      System.out.print(subject.toString());
      System.out.print(" -> " + predicate.toString() + " -> ");
      if (object instanceof Resource) {
        System.out.print(object.toString() + "\n");
      } else {
        // object is a literal
        System.out.print(" \"" + object.toString() + "\"\n");
      }
    }

    /* for(int i = 0; i < (obj.size()); i++){

    	String sparqlQueryString1=
    								"SELECT ?s ?o "+
    								"WHERE {"+
    								"?s ?p ?o ."+
    								"?o <bif:contains> \""+obj.get(i)+"\" ."+
    								"}"+
    								"limit 10";

    		      Query query = QueryFactory.create(sparqlQueryString1);
    		      QueryExecution qexec = QueryExecutionFactory.sparqlService("http://pubmed.bio2rdf.org/sparql", query);

    		      ResultSet results = qexec.execSelect();
    		      System.out.println("Query: "+obj.get(i));
    		      ResultSetFormatter.out(System.out, results, query);

    		     qexec.close() ;
    } */

  }
Пример #4
0
  /**
   * @param uri : the URI to be dereferenced
   * @param predicates : targeted predicates to be added to enrich the model
   * @return This method retrieves list of values for targeted predicates for a URI-typed object for
   *     each URI-typed object, through content negotiation an open connection is done retrieving
   *     its predicates/values. An iteration is made over targeted predicates. For each predicate
   *     list of statements with the targeted predicate is retrieved and extracting its value in
   *     order to be added to hashmap<predicate,Value>
   */
  @SuppressWarnings("unchecked")
  private static HashMap<Property, List<RDFNode>> getURIInfo(RDFNode p) {
    String uri = p.asResource().getURI();
    // to store each predicate and its value
    HashMap<Property, List<RDFNode>> resourceFocusedInfo = new HashMap<Property, List<RDFNode>>();

    if (demo) { // Deserialize the results if exists (For Demo purpose)
      if (useCache) {
        try {
          HashMap<String, List<String>> ser = new HashMap<String, List<String>>();
          File file = new File("resourceFocusedInfo.ser");
          if (file.exists()) {
            ObjectInputStream in = new ObjectInputStream(new FileInputStream(file));
            ser = (HashMap<String, List<String>>) in.readObject();
            in.close();
            // convert every object back from string
            for (String prop : ser.keySet()) {
              List<String> l = ser.get(prop);
              List<RDFNode> nodes = new ArrayList<RDFNode>();
              for (String n : l) {
                nodes.add(ResourceFactory.createResource(n));
              }
              resourceFocusedInfo.put(ResourceFactory.createProperty(prop), nodes);
            }
            return resourceFocusedInfo;
          }
        } catch (Exception e) {
          e.printStackTrace();
        }
      }
    }

    // define local model to have the data of the URI and extract focused info through built sparql
    // query
    List<RDFNode> values = new ArrayList<RDFNode>();
    try {
      URLConnection conn = new URL(uri).openConnection();
      conn.setRequestProperty("Accept", "application/rdf+xml");
      conn.setRequestProperty("Accept-Language", "en");
      Model model = ModelFactory.createDefaultModel();
      InputStream in = conn.getInputStream();
      model.read(in, null);
      for (Property inputProperty : inputProperties) {
        for (Statement st :
            model.listStatements(model.getResource(uri), inputProperty, (RDFNode) null).toList()) {
          RDFNode value = st.getObject();
          if (value.isLiteral()) {
            if (value.asLiteral().getLanguage().toLowerCase().equals("en")
                || value.asLiteral().getLanguage().toLowerCase().equals("")) {
              values.add(value);
            }
          } else {
            values.add(value);
          }
        }
        resourceFocusedInfo.put(inputProperty, values);
        values = new ArrayList<RDFNode>(); // create new list for new predicate
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    if (demo) { // serialize the output (for Demo purpose)
      try {
        HashMap<String, List<String>> ser = new HashMap<String, List<String>>();
        FileOutputStream fileOut = new FileOutputStream("resourceFocusedInfo.ser");
        ObjectOutputStream out = new ObjectOutputStream(fileOut);
        // convert to Serializabe Strings
        for (Property prop : resourceFocusedInfo.keySet()) {
          List<String> l = new ArrayList<String>();
          for (RDFNode n : resourceFocusedInfo.get(prop)) {
            l.add(n.toString());
          }
          ser.put(prop.toString(), l);
        }
        out.writeObject(ser);
        out.close();
      } catch (Exception e2) {
        e2.printStackTrace();
      }
    }
    return resourceFocusedInfo;
  }
  /**
   * Get JSON map for a given resource by applying the river settings
   *
   * @param rs resource being processed
   * @param properties properties to be indexed
   * @param model model returned by the indexing query
   * @param getPropLabel if set to true all URI property values will be indexed as their label. The
   *     label is taken as the value of one of the properties set in {@link #uriDescriptionList}.
   * @return map of properties to be indexed for res
   */
  private Map<String, ArrayList<String>> getJsonMap(
      Resource rs, Set<Property> properties, Model model, boolean getPropLabel) {
    Map<String, ArrayList<String>> jsonMap = new HashMap<String, ArrayList<String>>();
    ArrayList<String> results = new ArrayList<String>();

    if (addUriForResource) {
      results.add("\"" + rs.toString() + "\"");
      jsonMap.put("http://www.w3.org/1999/02/22-rdf-syntax-ns#about", results);
    }

    Set<String> rdfLanguages = new HashSet<String>();

    for (Property prop : properties) {
      NodeIterator niter = model.listObjectsOfProperty(rs, prop);
      String property = prop.toString();
      results = new ArrayList<String>();

      String lang;
      String currValue;

      while (niter.hasNext()) {
        RDFNode node = niter.next();
        currValue = getStringForResult(node, getPropLabel);
        if (addLanguage) {
          if (node.isLiteral()) {
            lang = node.asLiteral().getLanguage();
            if (!lang.isEmpty()) {
              rdfLanguages.add("\"" + lang + "\"");
            }
          }
        }

        String shortValue = currValue;

        int currLen = currValue.length();
        // Unquote string
        if (currLen > 1) shortValue = currValue.substring(1, currLen - 1);

        // If either whiteMap does contains shortValue
        // or blackMap contains the value
        // skip adding it to the index
        boolean whiteMapCond =
            whiteMap.containsKey(property) && !whiteMap.get(property).contains(shortValue);
        boolean blackMapCond =
            blackMap.containsKey(property) && blackMap.get(property).contains(shortValue);

        if (whiteMapCond || blackMapCond) {
          continue;
        }

        if (normalizeObj.containsKey(shortValue)) {
          results.add("\"" + normalizeObj.get(shortValue) + "\"");
        } else {
          results.add(currValue);
        }
      }

      // Do not index empty properties
      if (results.isEmpty()) continue;

      if (normalizeProp.containsKey(property)) {
        property = normalizeProp.get(property);
        if (jsonMap.containsKey(property)) {
          jsonMap.get(property).addAll(results);
        } else {
          jsonMap.put(property, results);
        }
      } else {
        jsonMap.put(property, results);
      }
    }

    if (addLanguage) {
      if (rdfLanguages.isEmpty() && !language.isEmpty()) rdfLanguages.add(language);
      if (!rdfLanguages.isEmpty()) jsonMap.put("language", new ArrayList<String>(rdfLanguages));
    }

    for (Map.Entry<String, String> it : normalizeMissing.entrySet()) {
      if (!jsonMap.containsKey(it.getKey())) {
        ArrayList<String> res = new ArrayList<String>();
        res.add("\"" + it.getValue() + "\"");
        jsonMap.put(it.getKey(), res);
      }
    }

    return jsonMap;
  }