private void readKeyValue(IFrameTupleReference tuple) throws HyracksDataException {
    FrameTupleReference ftr = (FrameTupleReference) tuple;
    IFrameTupleAccessor fta = ftr.getFrameTupleAccessor();
    ByteBuffer buffer = fta.getBuffer();
    int tIndex = ftr.getTupleIndex();

    int keyStart =
        fta.getFieldSlotsLength()
            + fta.getTupleStartOffset(tIndex)
            + fta.getFieldStartOffset(tIndex, 0);
    int valueStart =
        fta.getFieldSlotsLength()
            + fta.getTupleStartOffset(tIndex)
            + fta.getFieldStartOffset(tIndex, 1);

    keyInputStream.setByteBuffer(buffer, keyStart);
    valueInputStream.setByteBuffer(buffer, valueStart);

    try {
      // read key if necessary
      if (!skipKey) {
        key.readFields(keyInput);
      }
      // read value
      value.readFields(valueInput);
    } catch (Exception e) {
      throw new HyracksDataException(e);
    }
  }
Пример #2
0
 public void map(
     WritableComparable key, Writable value, OutputCollector collector, Reporter reporter)
     throws IOException {
   // convert on the fly from old formats with UTF8 keys
   if (key instanceof UTF8) {
     newKey.set(key.toString());
     key = newKey;
   }
   collector.collect(key, new ObjectWritable(value));
 }
Пример #3
0
 protected void setup(Context context) throws IOException, InterruptedException {
   super.setup(context);
   scriptEngine = scriptEngineManager.getEngineByName("JavaScript");
   try {
     Configuration configuration = context.getConfiguration();
     mapOutputKey = MrUtils.getWritableComparableType("js.map.output.key.type", configuration);
     mapOutputValue = MrUtils.getWritableComparableType("js.map.output.value.type", configuration);
     scriptEngine.eval(
         MrUtils.getScripts(
             context.getConfiguration(), MrUtils.getPathFilter("js.map.filename", configuration)));
     scriptEngine.put("mapOutputKey", mapOutputKey);
     scriptEngine.put("mapOutputValue", mapOutputValue);
     OUTPUT_KEY_CLASS = mapOutputKey.getClass();
     OUTPUT_VALUE_CLASS = mapOutputValue.getClass();
   } catch (ScriptException se) {
     IOException ioe = new IOException(se);
     ioe.setStackTrace(se.getStackTrace());
     throw ioe;
   }
 }
Пример #4
0
 public int read() throws IOException {
   int ret;
   if (null == inbuf || -1 == (ret = inbuf.read())) {
     if (!r.next(key, val)) {
       return -1;
     }
     byte[] tmp = key.toString().getBytes();
     outbuf.write(tmp, 0, tmp.length);
     outbuf.write('\t');
     tmp = val.toString().getBytes();
     outbuf.write(tmp, 0, tmp.length);
     outbuf.write('\n');
     inbuf.reset(outbuf.getData(), outbuf.getLength());
     outbuf.reset();
     ret = inbuf.read();
   }
   return ret;
 }
Пример #5
0
 public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
     throws IOException {
   // convert on the fly from the old format
   if (key instanceof UTF8) {
     newKey.set(key.toString());
     key = newKey;
   }
   if (filters != null) {
     try {
       if (filters.filter(((Text) key).toString()) == null) {
         return;
       }
     } catch (Exception e) {
       if (LOG.isWarnEnabled()) {
         LOG.warn("Cannot filter key " + key + ": " + e.getMessage());
       }
     }
   }
   output.collect(key, value);
 }
Пример #6
0
  public void reduce(
      WritableComparable key, Iterator values, OutputCollector output, Reporter reporter)
      throws IOException {
    StringBuffer dump = new StringBuffer();

    dump.append("\nRecno:: ").append(recNo++).append("\n");
    dump.append("URL:: " + key.toString() + "\n");
    while (values.hasNext()) {
      Object value = ((ObjectWritable) values.next()).get(); // unwrap
      if (value instanceof CrawlDatum) {
        dump.append("\nCrawlDatum::\n").append(((CrawlDatum) value).toString());
      } else if (value instanceof Content) {
        dump.append("\nContent::\n").append(((Content) value).toString());
      } else if (value instanceof ParseData) {
        dump.append("\nParseData::\n").append(((ParseData) value).toString());
      } else if (value instanceof ParseText) {
        dump.append("\nParseText::\n").append(((ParseText) value).toString());
      } else if (LOG.isWarnEnabled()) {
        LOG.warn("Unrecognized type: " + value.getClass());
      }
    }
    output.collect(key, new ObjectWritable(dump.toString()));
  }
Пример #7
0
 public int compare(WritableComparable a, WritableComparable b) {
   String aStr = a.toString();
   String bStr = b.toString();
   return compareStr(aStr, bStr);
 }
Пример #8
0
      @SuppressWarnings("unchecked")
      public void map(
          WritableComparable key,
          Writable value,
          OutputCollector<IntWritable, RecordStatsWritable> output,
          Reporter reporter)
          throws IOException {
        // Set up rawKey and rawValue on the first call to 'map'
        if (recordId == -1) {
          rawKey = createRaw(key.getClass());
          rawValue = createRaw(value.getClass());
        }
        ++recordId;

        if (this.key == sortOutput) {
          // Check if keys are 'sorted' if this
          // record is from sort's output
          if (prevKey == null) {
            prevKey = key;
            keyClass = prevKey.getClass();
          } else {
            // Sanity check
            if (keyClass != key.getClass()) {
              throw new IOException(
                  "Type mismatch in key: expected "
                      + keyClass.getName()
                      + ", recieved "
                      + key.getClass().getName());
            }

            // Check if they were sorted correctly
            if (prevKey.compareTo(key) > 0) {
              throw new IOException(
                  "The 'map-reduce' framework wrongly"
                      + " classifed ("
                      + prevKey
                      + ") > ("
                      + key
                      + ") "
                      + "for record# "
                      + recordId);
            }
            prevKey = key;
          }

          // Check if the sorted output is 'partitioned' right
          int keyPartition = partitioner.getPartition(key, value, noSortReducers);
          if (partition != keyPartition) {
            throw new IOException(
                "Partitions do not match for record# "
                    + recordId
                    + " ! - '"
                    + partition
                    + "' v/s '"
                    + keyPartition
                    + "'");
          }
        }

        // Construct the record-stats and output (this.key, record-stats)
        byte[] keyBytes = rawKey.getRawBytes(key);
        int keyBytesLen = rawKey.getRawBytesLength(key);
        byte[] valueBytes = rawValue.getRawBytes(value);
        int valueBytesLen = rawValue.getRawBytesLength(value);

        int keyValueChecksum =
            (WritableComparator.hashBytes(keyBytes, keyBytesLen)
                ^ WritableComparator.hashBytes(valueBytes, valueBytesLen));

        output.collect(
            this.key, new RecordStatsWritable((keyBytesLen + valueBytesLen), 1, keyValueChecksum));
      }