private void readKeyValue(IFrameTupleReference tuple) throws HyracksDataException { FrameTupleReference ftr = (FrameTupleReference) tuple; IFrameTupleAccessor fta = ftr.getFrameTupleAccessor(); ByteBuffer buffer = fta.getBuffer(); int tIndex = ftr.getTupleIndex(); int keyStart = fta.getFieldSlotsLength() + fta.getTupleStartOffset(tIndex) + fta.getFieldStartOffset(tIndex, 0); int valueStart = fta.getFieldSlotsLength() + fta.getTupleStartOffset(tIndex) + fta.getFieldStartOffset(tIndex, 1); keyInputStream.setByteBuffer(buffer, keyStart); valueInputStream.setByteBuffer(buffer, valueStart); try { // read key if necessary if (!skipKey) { key.readFields(keyInput); } // read value value.readFields(valueInput); } catch (Exception e) { throw new HyracksDataException(e); } }
public void map( WritableComparable key, Writable value, OutputCollector collector, Reporter reporter) throws IOException { // convert on the fly from old formats with UTF8 keys if (key instanceof UTF8) { newKey.set(key.toString()); key = newKey; } collector.collect(key, new ObjectWritable(value)); }
protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); scriptEngine = scriptEngineManager.getEngineByName("JavaScript"); try { Configuration configuration = context.getConfiguration(); mapOutputKey = MrUtils.getWritableComparableType("js.map.output.key.type", configuration); mapOutputValue = MrUtils.getWritableComparableType("js.map.output.value.type", configuration); scriptEngine.eval( MrUtils.getScripts( context.getConfiguration(), MrUtils.getPathFilter("js.map.filename", configuration))); scriptEngine.put("mapOutputKey", mapOutputKey); scriptEngine.put("mapOutputValue", mapOutputValue); OUTPUT_KEY_CLASS = mapOutputKey.getClass(); OUTPUT_VALUE_CLASS = mapOutputValue.getClass(); } catch (ScriptException se) { IOException ioe = new IOException(se); ioe.setStackTrace(se.getStackTrace()); throw ioe; } }
public int read() throws IOException { int ret; if (null == inbuf || -1 == (ret = inbuf.read())) { if (!r.next(key, val)) { return -1; } byte[] tmp = key.toString().getBytes(); outbuf.write(tmp, 0, tmp.length); outbuf.write('\t'); tmp = val.toString().getBytes(); outbuf.write(tmp, 0, tmp.length); outbuf.write('\n'); inbuf.reset(outbuf.getData(), outbuf.getLength()); outbuf.reset(); ret = inbuf.read(); } return ret; }
public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException { // convert on the fly from the old format if (key instanceof UTF8) { newKey.set(key.toString()); key = newKey; } if (filters != null) { try { if (filters.filter(((Text) key).toString()) == null) { return; } } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn("Cannot filter key " + key + ": " + e.getMessage()); } } } output.collect(key, value); }
public void reduce( WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { StringBuffer dump = new StringBuffer(); dump.append("\nRecno:: ").append(recNo++).append("\n"); dump.append("URL:: " + key.toString() + "\n"); while (values.hasNext()) { Object value = ((ObjectWritable) values.next()).get(); // unwrap if (value instanceof CrawlDatum) { dump.append("\nCrawlDatum::\n").append(((CrawlDatum) value).toString()); } else if (value instanceof Content) { dump.append("\nContent::\n").append(((Content) value).toString()); } else if (value instanceof ParseData) { dump.append("\nParseData::\n").append(((ParseData) value).toString()); } else if (value instanceof ParseText) { dump.append("\nParseText::\n").append(((ParseText) value).toString()); } else if (LOG.isWarnEnabled()) { LOG.warn("Unrecognized type: " + value.getClass()); } } output.collect(key, new ObjectWritable(dump.toString())); }
public int compare(WritableComparable a, WritableComparable b) { String aStr = a.toString(); String bStr = b.toString(); return compareStr(aStr, bStr); }
@SuppressWarnings("unchecked") public void map( WritableComparable key, Writable value, OutputCollector<IntWritable, RecordStatsWritable> output, Reporter reporter) throws IOException { // Set up rawKey and rawValue on the first call to 'map' if (recordId == -1) { rawKey = createRaw(key.getClass()); rawValue = createRaw(value.getClass()); } ++recordId; if (this.key == sortOutput) { // Check if keys are 'sorted' if this // record is from sort's output if (prevKey == null) { prevKey = key; keyClass = prevKey.getClass(); } else { // Sanity check if (keyClass != key.getClass()) { throw new IOException( "Type mismatch in key: expected " + keyClass.getName() + ", recieved " + key.getClass().getName()); } // Check if they were sorted correctly if (prevKey.compareTo(key) > 0) { throw new IOException( "The 'map-reduce' framework wrongly" + " classifed (" + prevKey + ") > (" + key + ") " + "for record# " + recordId); } prevKey = key; } // Check if the sorted output is 'partitioned' right int keyPartition = partitioner.getPartition(key, value, noSortReducers); if (partition != keyPartition) { throw new IOException( "Partitions do not match for record# " + recordId + " ! - '" + partition + "' v/s '" + keyPartition + "'"); } } // Construct the record-stats and output (this.key, record-stats) byte[] keyBytes = rawKey.getRawBytes(key); int keyBytesLen = rawKey.getRawBytesLength(key); byte[] valueBytes = rawValue.getRawBytes(value); int valueBytesLen = rawValue.getRawBytesLength(value); int keyValueChecksum = (WritableComparator.hashBytes(keyBytes, keyBytesLen) ^ WritableComparator.hashBytes(valueBytes, valueBytesLen)); output.collect( this.key, new RecordStatsWritable((keyBytesLen + valueBytesLen), 1, keyValueChecksum)); }