private void buildNodesObjectFile() { // spill(sdb01) ; this.sdb02 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>( getThresholdPolicy(), serializationFactory, new PairComparator()); this.sdb03 = new MultiThreadedSortedDataBag<Pair<byte[], byte[]>>( getThresholdPolicy(), serializationFactory, new PairComparator()); try { log.info("Node Table (1/3): building nodes.dat and sorting hash|id ..."); ProgressLogger monitor01 = new ProgressLogger( log, "records for node table (1/3) phase", BulkLoader.DataTickPoint, BulkLoader.superTick); monitor01.start(); String curr = null; long id = -1L; Iterator<Pair<byte[], byte[]>> iter01 = sdb01.iterator(); while (iter01.hasNext()) { Pair<byte[], byte[]> pair01 = iter01.next(); String leftIn = new String(pair01.getLeft(), "UTF-8"); String rightIn = new String(pair01.getRight(), "UTF-8"); if (!leftIn.equals(curr)) { curr = leftIn; // generate the node id Node node = tdbloader3.parse(leftIn); id = NodeLib.encodeStore(node, objects); // add to hash|id Hash hash = new Hash(SystemTDB.LenNodeHash); setHash(hash, node); sdb03.add(new Pair<byte[], byte[]>(hash.getBytes(), Bytes.packLong(id))); } // System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ; String tokens[] = rightIn.split("\\|"); String leftOut = tokens[0]; String rightOut = id + "|" + tokens[1]; // System.out.println ("> ( " + leftOut + ", " + rightOut + " )") ; Pair<byte[], byte[]> pair02 = new Pair<byte[], byte[]>(leftOut.getBytes("UTF-8"), rightOut.getBytes("UTF-8")); sdb02.add(pair02); monitor01.tick(); } ProgressLogger.print(log, monitor01); } catch (UnsupportedEncodingException e) { throw new AtlasException(e); } finally { sdb01.close(); sdb01 = null; // spill (sdb02) ; // spill (sdb03) ; } }
private void generateSortedHashNodeIdDataBag() { try { log.info("Node Table (2/3): generating input data using node ids..."); final ProgressLogger monitor02 = new ProgressLogger( log, "records for node table (2/3) phase", BulkLoader.DataTickPoint, BulkLoader.superTick); monitor02.start(); Iterator<Pair<byte[], byte[]>> iter02 = sdb02.iterator(); String curr = null; Long s = null; Long p = null; Long o = null; Long g = null; while (iter02.hasNext()) { Pair<byte[], byte[]> pair02 = iter02.next(); String leftIn = new String(pair02.getLeft(), "UTF-8"); String rightIn = new String(pair02.getRight(), "UTF-8"); // System.out.println ("< ( " + leftIn + ", " + rightIn + " )") ; if (curr == null) curr = leftIn; if (!leftIn.equals(curr)) { curr = leftIn; write(g, s, p, o); s = null; p = null; o = null; g = null; monitor02.tick(); } String tokens[] = rightIn.split("\\|"); if ("s".equals(tokens[1])) s = Long.parseLong(tokens[0]); else if ("p".equals(tokens[1])) p = Long.parseLong(tokens[0]); else if ("o".equals(tokens[1])) o = Long.parseLong(tokens[0]); else if ("g".equals(tokens[1])) g = Long.parseLong(tokens[0]); } write(g, s, p, o); // ensure we write the last triple|quad ProgressLogger.print(log, monitor02); } catch (UnsupportedEncodingException e) { throw new AtlasException(e); } finally { sdb02.close(); sdb02 = null; } }