@Override
  public Map.Entry<RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange> defineRange(
      RyaURI subject,
      RyaURI predicate,
      RyaType object,
      RyaURI context,
      RdfCloudTripleStoreConfiguration conf)
      throws IOException {
    try {
      // po(ng)
      // po_r(s)(ng)
      // p(ng)
      // p_r(o)(ng)
      // r(p)(ng)
      if (!handles(subject, predicate, object, context)) return null;

      RyaContext ryaContext = RyaContext.getInstance();
      MessageDigest md = MessageDigest.getInstance("MD5");

      RdfCloudTripleStoreConstants.TABLE_LAYOUT table_layout =
          RdfCloudTripleStoreConstants.TABLE_LAYOUT.PO;
      byte[] start, stop;
      if (object != null) {
        if (object instanceof RyaRange) {
          // p_r(o)
          RyaRange rv = (RyaRange) object;
          rv = ryaContext.transformRange(rv);
          byte[] objStartBytes = ryaContext.serializeType(rv.getStart())[0];
          byte[] objEndBytes = ryaContext.serializeType(rv.getStop())[0];
          byte[] predBytes = predicate.getData().getBytes();
          byte[] predHash = md.digest(predBytes);
          start = Bytes.concat(predHash, DELIM_BYTES, predBytes, DELIM_BYTES, objStartBytes);
          stop =
              Bytes.concat(
                  predHash,
                  DELIM_BYTES,
                  predBytes,
                  DELIM_BYTES,
                  objEndBytes,
                  DELIM_BYTES,
                  LAST_BYTES);
        } else {
          if (subject != null && subject instanceof RyaRange) {
            // po_r(s)
            RyaRange ru = (RyaRange) subject;
            ru = ryaContext.transformRange(ru);
            byte[] subjStartBytes = ru.getStart().getData().getBytes();
            byte[] subjStopBytes = ru.getStop().getData().getBytes();
            byte[] predBytes = predicate.getData().getBytes();
            byte[] predHash = md.digest(predBytes);
            byte[] objBytes = ryaContext.serializeType(object)[0];
            start =
                Bytes.concat(
                    predHash,
                    DELIM_BYTES,
                    predBytes,
                    DELIM_BYTES,
                    objBytes,
                    DELIM_BYTES,
                    subjStartBytes);
            stop =
                Bytes.concat(
                    predHash,
                    DELIM_BYTES,
                    predBytes,
                    DELIM_BYTES,
                    objBytes,
                    DELIM_BYTES,
                    subjStopBytes,
                    TYPE_DELIM_BYTES,
                    LAST_BYTES);
          } else {
            // po
            // TODO: There must be a better way than creating multiple byte[]
            byte[] predBytes = predicate.getData().getBytes();
            byte[] predHash = md.digest(predBytes);
            byte[] objBytes = ryaContext.serializeType(object)[0];
            start =
                Bytes.concat(predHash, DELIM_BYTES, predBytes, DELIM_BYTES, objBytes, DELIM_BYTES);
            stop = Bytes.concat(start, LAST_BYTES);
          }
        }
      } else {
        // p
        byte[] predBytes = predicate.getData().getBytes();
        byte[] predHash = md.digest(predBytes);
        start = Bytes.concat(predHash, DELIM_BYTES, predBytes, DELIM_BYTES);
        stop = Bytes.concat(start, LAST_BYTES);
      }
      return new RdfCloudTripleStoreUtils.CustomEntry<
          RdfCloudTripleStoreConstants.TABLE_LAYOUT, ByteRange>(
          table_layout, new ByteRange(start, stop));
    } catch (RyaTypeResolverException e) {
      throw new IOException(e);
    } catch (NoSuchAlgorithmException e) {
      throw new IOException(e);
    }
  }
예제 #2
0
/**
 * Use HashTable to do a HashJoin.
 *
 * <p>TODO: Somehow make a more streaming way of doing this hash join. This will not support large
 * sets. Date: 7/26/12 Time: 8:58 AM
 */
public class HashJoin<C extends RdfCloudTripleStoreConfiguration> implements Join<C> {

  private RyaContext ryaContext = RyaContext.getInstance();
  private RyaQueryEngine ryaQueryEngine;

  public HashJoin() {}

  public HashJoin(RyaQueryEngine ryaQueryEngine) {
    this.ryaQueryEngine = ryaQueryEngine;
  }

  @Override
  public CloseableIteration<RyaStatement, RyaDAOException> join(C conf, RyaURI... preds)
      throws RyaDAOException {
    ConcurrentHashMap<Map.Entry<RyaURI, RyaType>, Integer> ht =
        new ConcurrentHashMap<Map.Entry<RyaURI, RyaType>, Integer>();
    int count = 0;
    boolean first = true;
    for (RyaURI pred : preds) {
      count++;
      // query
      CloseableIteration<RyaStatement, RyaDAOException> results =
          ryaQueryEngine.query(new RyaStatement(null, pred, null), null);
      // add to hashtable
      while (results.hasNext()) {
        RyaStatement next = results.next();
        RyaURI subject = next.getSubject();
        RyaType object = next.getObject();
        Map.Entry<RyaURI, RyaType> entry =
            new RdfCloudTripleStoreUtils.CustomEntry<RyaURI, RyaType>(subject, object);
        if (!first) {
          if (!ht.containsKey(entry)) {
            continue; // not in join
          }
        }
        ht.put(entry, count);
      }
      // remove from hashtable values that are under count
      if (first) {
        first = false;
      } else {
        for (Map.Entry<Map.Entry<RyaURI, RyaType>, Integer> entry : ht.entrySet()) {
          if (entry.getValue() < count) {
            ht.remove(entry.getKey());
          }
        }
      }
    }
    final Enumeration<Map.Entry<RyaURI, RyaType>> keys = ht.keys();
    return new CloseableIteration<RyaStatement, RyaDAOException>() {
      @Override
      public void close() throws RyaDAOException {}

      @Override
      public boolean hasNext() throws RyaDAOException {
        return keys.hasMoreElements();
      }

      @Override
      public RyaStatement next() throws RyaDAOException {
        Map.Entry<RyaURI, RyaType> subjObj = keys.nextElement();
        return new RyaStatement(subjObj.getKey(), null, subjObj.getValue());
      }

      @Override
      public void remove() throws RyaDAOException {
        keys.nextElement();
      }
    };
  }

  @Override
  public CloseableIteration<RyaURI, RyaDAOException> join(
      C conf, Map.Entry<RyaURI, RyaType>... predObjs) throws RyaDAOException {
    ConcurrentHashMap<RyaURI, Integer> ht = new ConcurrentHashMap<RyaURI, Integer>();
    int count = 0;
    boolean first = true;
    for (Map.Entry<RyaURI, RyaType> predObj : predObjs) {
      count++;
      RyaURI pred = predObj.getKey();
      RyaType obj = predObj.getValue();
      // query
      CloseableIteration<RyaStatement, RyaDAOException> results =
          ryaQueryEngine.query(new RyaStatement(null, pred, obj), null);
      // add to hashtable
      while (results.hasNext()) {
        RyaURI subject = results.next().getSubject();
        if (!first) {
          if (!ht.containsKey(subject)) {
            continue; // not in join
          }
        }
        ht.put(subject, count);
      }
      // remove from hashtable values that are under count
      if (first) {
        first = false;
      } else {
        for (Map.Entry<RyaURI, Integer> entry : ht.entrySet()) {
          if (entry.getValue() < count) {
            ht.remove(entry.getKey());
          }
        }
      }
    }
    return new EnumerationWrapper<RyaURI, RyaDAOException>(ht.keys());
  }

  public RyaQueryEngine getRyaQueryEngine() {
    return ryaQueryEngine;
  }

  public void setRyaQueryEngine(RyaQueryEngine ryaQueryEngine) {
    this.ryaQueryEngine = ryaQueryEngine;
  }
}