public boolean readRecord(Record target, byte[] bytes, int offset, int numBytes) { StringValue str = this.theString; if (this.ascii) { str.setValueAscii(bytes, offset, numBytes); } else { ByteBuffer byteWrapper = this.byteWrapper; if (bytes != byteWrapper.array()) { byteWrapper = ByteBuffer.wrap(bytes, 0, bytes.length); this.byteWrapper = byteWrapper; } byteWrapper.limit(offset + numBytes); byteWrapper.position(offset); try { CharBuffer result = this.decoder.decode(byteWrapper); str.setValue(result); } catch (CharacterCodingException e) { byte[] copy = new byte[numBytes]; System.arraycopy(bytes, offset, copy, 0, numBytes); LOG.warn("Line could not be encoded: " + Arrays.toString(copy), e); return false; } } target.clear(); target.setField(this.pos, str); return true; }
@Override public int compare(DataInputView firstSource, DataInputView secondSource) throws IOException { holder1.read(firstSource); holder2.read(secondSource); int comp = holder1.compareTo(holder2); return ascendingComparison ? comp : -comp; }
@Override public Record readRecord(Record target, byte[] bytes, int offset, int numBytes) { String lineStr = new String(bytes, offset, numBytes); // replace reduce whitespaces and trim lineStr = lineStr.replaceAll("\\s+", " ").trim(); // build whitespace tokenizer StringTokenizer st = new StringTokenizer(lineStr, " "); // line must have at least three elements if (st.countTokens() < 3) { return null; } String rdfSubj = st.nextToken(); String rdfPred = st.nextToken(); String rdfObj = st.nextToken(); // we only want foaf:knows predicates if (!rdfPred.equals("<http://xmlns.com/foaf/0.1/knows>")) { return null; } // build node pair from subject and object fromNode.setValue(rdfSubj); toNode.setValue(rdfObj); target.setField(0, fromNode); target.setField(1, toNode); target.setField(2, pathLength); target.setField(3, hopCnt); target.setField(4, hopList); return target; }
@Override public void join(Record rec1, Record rec2, Collector<Record> out) throws Exception { // rec1 has matching start, rec2 matching end // Therefore, rec2's end node and rec1's start node are identical // First half of new path will be rec2, second half will be rec1 // Get from-node and to-node of new path final StringValue fromNode = rec2.getField(0, StringValue.class); final StringValue toNode = rec1.getField(1, StringValue.class); // Check whether from-node = to-node to prevent circles! if (fromNode.equals(toNode)) { return; } // Create new path outputRecord.setField(0, fromNode); outputRecord.setField(1, toNode); // Compute length of new path length.setValue( rec1.getField(2, IntValue.class).getValue() + rec2.getField(2, IntValue.class).getValue()); outputRecord.setField(2, length); // compute hop count int hops = rec1.getField(3, IntValue.class).getValue() + 1 + rec2.getField(3, IntValue.class).getValue(); hopCnt.setValue(hops); outputRecord.setField(3, hopCnt); // Concatenate hops lists and insert matching node StringBuilder sb = new StringBuilder(); // first path sb.append(rec2.getField(4, StringValue.class).getValue()); sb.append(" "); // common node sb.append(rec1.getField(0, StringValue.class).getValue()); // second path sb.append(" "); sb.append(rec1.getField(4, StringValue.class).getValue()); hopList.setValue(sb.toString().trim()); outputRecord.setField(4, hopList); out.collect(outputRecord); }
@Override public int compare(DataInputView firstSource, DataInputView secondSource) throws IOException { return StringValue.readString(firstSource).compareTo(StringValue.readString(secondSource)); }