@Override public Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange> defineRange( RyaURI subject, RyaURI predicate, RyaType object, RyaURI context, RdfCloudTripleStoreConfiguration conf) throws IOException { try { // po(ng) // po_r(s)(ng) // p(ng) // p_r(o)(ng) // r(p)(ng) if (!handles(subject, predicate, object, context)) return null; RyaContext ryaContext = RyaContext.getInstance(); MessageDigest md = MessageDigest.getInstance("MD5"); RdfCloudTripleStoreConstants.TABLE_LAYOUT table_layout = RdfCloudTripleStoreConstants.TABLE_LAYOUT.PO; byte[] start, stop; if (object != null) { if (object instanceof RyaRange) { // p_r(o) RyaRange rv = (RyaRange) object; rv = ryaContext.transformRange(rv); byte[] objStartBytes = ryaContext.serializeType(rv.getStart())[0]; byte[] objEndBytes = ryaContext.serializeType(rv.getStop())[0]; byte[] predBytes = predicate.getData().getBytes(); byte[] predHash = md.digest(predBytes); start = Bytes.concat(predHash, DELIM_BYTES, predBytes, DELIM_BYTES, objStartBytes); stop = Bytes.concat( predHash, DELIM_BYTES, predBytes, DELIM_BYTES, objEndBytes, DELIM_BYTES, LAST_BYTES); } else { if (subject != null && subject instanceof RyaRange) { // po_r(s) RyaRange ru = (RyaRange) subject; ru = ryaContext.transformRange(ru); byte[] subjStartBytes = ru.getStart().getData().getBytes(); byte[] subjStopBytes = ru.getStop().getData().getBytes(); byte[] predBytes = predicate.getData().getBytes(); byte[] predHash = md.digest(predBytes); byte[] objBytes = ryaContext.serializeType(object)[0]; start = Bytes.concat( predHash, DELIM_BYTES, predBytes, DELIM_BYTES, objBytes, DELIM_BYTES, subjStartBytes); stop = Bytes.concat( predHash, DELIM_BYTES, predBytes, DELIM_BYTES, objBytes, DELIM_BYTES, subjStopBytes, TYPE_DELIM_BYTES, LAST_BYTES); } else { // po // TODO: There must be a better way than creating multiple byte[] byte[] predBytes = predicate.getData().getBytes(); byte[] predHash = md.digest(predBytes); byte[] objBytes = ryaContext.serializeType(object)[0]; start = Bytes.concat(predHash, DELIM_BYTES, predBytes, DELIM_BYTES, objBytes, DELIM_BYTES); stop = Bytes.concat(start, LAST_BYTES); } } } else { // p byte[] predBytes = predicate.getData().getBytes(); byte[] predHash = md.digest(predBytes); start = Bytes.concat(predHash, DELIM_BYTES, predBytes, DELIM_BYTES); stop = Bytes.concat(start, LAST_BYTES); } return new RdfCloudTripleStoreUtils.CustomEntry< RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange>( table_layout, new ByteRange(start, stop)); } catch (RyaTypeResolverException e) { throw new IOException(e); } catch (NoSuchAlgorithmException e) { throw new IOException(e); } }
/** * Use HashTable to do a HashJoin. * * <p>TODO: Somehow make a more streaming way of doing this hash join. This will not support large * sets. Date: 7/26/12 Time: 8:58 AM */ public class HashJoin<C extends RdfCloudTripleStoreConfiguration> implements Join<C> { private RyaContext ryaContext = RyaContext.getInstance(); private RyaQueryEngine ryaQueryEngine; public HashJoin() {} public HashJoin(RyaQueryEngine ryaQueryEngine) { this.ryaQueryEngine = ryaQueryEngine; } @Override public CloseableIteration<RyaStatement, RyaDAOException> join(C conf, RyaURI... preds) throws RyaDAOException { ConcurrentHashMap<Map.Entry<RyaURI, RyaType>, Integer> ht = new ConcurrentHashMap<Map.Entry<RyaURI, RyaType>, Integer>(); int count = 0; boolean first = true; for (RyaURI pred : preds) { count++; // query CloseableIteration<RyaStatement, RyaDAOException> results = ryaQueryEngine.query(new RyaStatement(null, pred, null), null); // add to hashtable while (results.hasNext()) { RyaStatement next = results.next(); RyaURI subject = next.getSubject(); RyaType object = next.getObject(); Map.Entry<RyaURI, RyaType> entry = new RdfCloudTripleStoreUtils.CustomEntry<RyaURI, RyaType>(subject, object); if (!first) { if (!ht.containsKey(entry)) { continue; // not in join } } ht.put(entry, count); } // remove from hashtable values that are under count if (first) { first = false; } else { for (Map.Entry<Map.Entry<RyaURI, RyaType>, Integer> entry : ht.entrySet()) { if (entry.getValue() < count) { ht.remove(entry.getKey()); } } } } final Enumeration<Map.Entry<RyaURI, RyaType>> keys = ht.keys(); return new CloseableIteration<RyaStatement, RyaDAOException>() { @Override public void close() throws RyaDAOException {} @Override public boolean hasNext() throws RyaDAOException { return keys.hasMoreElements(); } @Override public RyaStatement next() throws RyaDAOException { Map.Entry<RyaURI, RyaType> subjObj = keys.nextElement(); return new RyaStatement(subjObj.getKey(), null, subjObj.getValue()); } @Override public void remove() throws RyaDAOException { keys.nextElement(); } }; } @Override public CloseableIteration<RyaURI, RyaDAOException> join( C conf, Map.Entry<RyaURI, RyaType>... predObjs) throws RyaDAOException { ConcurrentHashMap<RyaURI, Integer> ht = new ConcurrentHashMap<RyaURI, Integer>(); int count = 0; boolean first = true; for (Map.Entry<RyaURI, RyaType> predObj : predObjs) { count++; RyaURI pred = predObj.getKey(); RyaType obj = predObj.getValue(); // query CloseableIteration<RyaStatement, RyaDAOException> results = ryaQueryEngine.query(new RyaStatement(null, pred, obj), null); // add to hashtable while (results.hasNext()) { RyaURI subject = results.next().getSubject(); if (!first) { if (!ht.containsKey(subject)) { continue; // not in join } } ht.put(subject, count); } // remove from hashtable values that are under count if (first) { first = false; } else { for (Map.Entry<RyaURI, Integer> entry : ht.entrySet()) { if (entry.getValue() < count) { ht.remove(entry.getKey()); } } } } return new EnumerationWrapper<RyaURI, RyaDAOException>(ht.keys()); } public RyaQueryEngine getRyaQueryEngine() { return ryaQueryEngine; } public void setRyaQueryEngine(RyaQueryEngine ryaQueryEngine) { this.ryaQueryEngine = ryaQueryEngine; } }