Example #1
0
  public MapWritable getBucketAsKey(BucketStripped bucketStripped) throws IOException {
    MapWritable mw = new MapWritable();
    MapWritable bucketMap = new MapWritable();

    Bucket bucket = getBucket(bucketStripped);
    bucketMap.put(new IntWritable(bucket.hashCode()), bucket);
    mw.put(MAPWRITABLE_BUCKET_KEY, bucketMap);
    return mw;
  }
Example #2
0
  public MapWritable toMapWritableKey() {
    MapWritable mw = new MapWritable();
    MapWritable bucketMap = new MapWritable();

    for (IntWritable key : bucketCache.keySet()) {
      bucketMap.put(key, bucketCache.get(key));
    }
    mw.put(MAPWRITABLE_BUCKET_KEY, bucketMap);

    return mw;
  }
  @Override
  protected void map(Text key, DocumentMapping value, Context context)
      throws IOException, InterruptedException {
    // no need vector for this MR
    double distance = value.getDistance();
    Vector vector = value.getVector();

    // map output
    MapWritable m = new MapWritable();
    m.put(ClusterEvaluatorMR.DOC_VECTOR_KEY, new VectorWritable(vector));
    m.put(ClusterEvaluatorMR.DISTANCE_KEY, new DoubleWritable(distance));
    m.put(ClusterEvaluatorMR.SQUARED_DISTANCE_KEY, new DoubleWritable(distance * distance));
    m.put(ClusterEvaluatorMR.COUNT_KEY, new LongWritable(1));

    context.write(key, m);
  }
Example #4
0
  @Override
  public Writable serialize(final Object obj, final ObjectInspector inspector)
      throws SerDeException {

    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if (fields.size() != columnNames.size()) {
      throw new SerDeException(
          String.format("Required %d columns, received %d.", columnNames.size(), fields.size()));
    }

    cachedWritable.clear();
    for (int c = 0; c < fieldCount; c++) {
      StructField structField = fields.get(c);

      LOG.debug("fieldId=" + c + ",structField=" + structField.toString());

      if (structField != null) {
        final Object field = structInspector.getStructFieldData(obj, fields.get(c));

        final AbstractPrimitiveObjectInspector fieldOI =
            (AbstractPrimitiveObjectInspector) fields.get(c).getFieldObjectInspector();

        Writable value = (Writable) fieldOI.getPrimitiveWritableObject(field);

        if (value == null) {
          continue;
        }

        LOG.debug("fieldCount=" + fieldCount + ",value=" + value.toString());
        if (value instanceof IntWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), value);
        } else if (value instanceof Text) {
          cachedWritable.put(new Text(columnNames.get(c)), ((Text) value));
        } else if (value instanceof LongWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((LongWritable) value));
        } else if (value instanceof DoubleWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((DoubleWritable) value));
        } else if (value instanceof FloatWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((FloatWritable) value));
        } else if (value instanceof BooleanWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((BooleanWritable) value));
        } else if (value instanceof ByteWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((ByteWritable) value));
        } else if (value instanceof BytesWritable) {
          cachedWritable.put(new Text(columnNames.get(c)), ((BytesWritable) value));
        } else {
          LOG.warn("fieldCount=" + fieldCount + ",type=" + value.getClass().getName());
        }
      }
    }

    return cachedWritable;
  }
Example #5
0
  @Override
  public void remove() throws IOException {
    MapWritable msg = new MapWritable();
    msg.put(GraphJobRunner.FLAG_VERTEX_DECREASE, this.vertexID);

    // Get master task peer.
    String destPeer = GraphJobRunner.getMasterTask(this.getPeer());
    runner.getPeer().send(destPeer, new GraphJobMessage(msg));

    alterVertexCounter(-1);
  }
 public MapWritable readMap(MapWritable mw) throws IOException {
   if (mw == null) {
     mw = new MapWritable();
   }
   int length = in.readMapHeader();
   for (int i = 0; i < length; i++) {
     Writable key = read();
     Writable value = read();
     mw.put(key, value);
   }
   return mw;
 }
 public SequenceFileExportMapper() {
   Fields.put(new Text("full_path"), FullPath);
   Fields.put(new Text("extension"), Ext);
   Fields.put(new Text("sha1"), Sha);
   Fields.put(new Text("md5"), Md5);
   Fields.put(new Text("data"), Vid);
   Fields.put(new Text("hdfs_path"), HdfsPath);
 }
Example #8
0
  @Override
  public void addVertex(V vertexID, List<Edge<V, E>> edges, M value) throws IOException {
    MapWritable msg = new MapWritable();
    // Create the new vertex.
    Vertex<V, E, M> vertex = GraphJobRunner.<V, E, M>newVertexInstance(GraphJobRunner.VERTEX_CLASS);
    vertex.setEdges(edges);
    vertex.setValue(value);
    vertex.setVertexID(vertexID);

    msg.put(GraphJobRunner.FLAG_VERTEX_INCREASE, vertex);
    runner.getPeer().send(runner.getHostName(vertexID), new GraphJobMessage(msg));

    alterVertexCounter(1);
  }
 @Override
 public boolean next(LongWritable keyHolder, MapWritable valueHolder) throws IOException {
   if (!cursor.hasNext()) {
     return false;
   }
   DBObject record = cursor.next();
   keyHolder.set(pos);
   for (int i = 0; i < this.readColumns.length; i++) {
     String key = readColumns[i];
     Object vObj = ("id".equals(key)) ? record.get("_id") : record.get(key);
     Writable value = (vObj == null) ? NullWritable.get() : new Text(vObj.toString());
     valueHolder.put(new Text(key), value);
   }
   pos++;
   return true;
 }
Example #10
0
  @Override
  public boolean next(LongWritable keyHolder, MapWritable valueHolder) throws IOException {
    SolrDocument doc = cursor.nextDocument();
    if (doc == null) {
      return false;
    }

    keyHolder.set(pos++);

    for (int i = 0; i < readColumns.length; i++) {
      String key = readColumns[i];
      Object vObj = doc.getFieldValue(key);
      Writable value = (vObj == null) ? NullWritable.get() : new Text(vObj.toString());
      valueHolder.put(new Text(key), value);
    }
    return true;
  }
Example #11
0
  public boolean next(LongWritable key, MapWritable value) throws IOException {
    try {
      if (!results.next()) {
        return false;
      }

      // Set the key field value as the output key value
      key.set(pos + split.getStart());

      ResultSetMetaData resultsMetaData = results.getMetaData();
      int columnCount = resultsMetaData.getColumnCount();

      List<String> names = new ArrayList<String>();
      List<Integer> types = new ArrayList<Integer>();
      // The column count starts from 1
      for (int i = 1; i <= columnCount; i++) {
        // This is the column name in db table
        String name = resultsMetaData.getColumnName(i).toLowerCase();
        // Get the relevant metaTable name
        name = databaseProperties.getInputColumnMappingFields().get(name);
        int type = resultsMetaData.getColumnType(i);
        // Hive keeps column names in lowercase
        names.add(name.toLowerCase());
        types.add(type);
      }

      for (int j = 0; j < types.size(); j++) {
        value.put(new Text(names.get(j)), getActualObjectTypeForValue(results, types, j));
      }

      pos++;
    } catch (SQLException e) {
      throw new IOException(e.getMessage());
    }
    return true;
  }
Example #12
0
  // Process an input document with GATE and a Reporter
  public synchronized BehemothDocument[] process(BehemothDocument inputDoc, Reporter reporter) {
    if (reporter != null) reporter.setStatus("GATE : " + inputDoc.getUrl().toString());

    boolean clearBehemothAnnotations = config.getBoolean("gate.deleteBehemothAnnotations", false);

    // process the text passed as value with the application
    // a) create a GATE document based on the text value
    gate.Document gatedocument = null;
    try {

      gatedocument = generateGATEDoc(inputDoc);
      // add it to the current corpus
      corpus.add(gatedocument);
      // get the application and assign the corpus to it
      this.GATEapplication.setCorpus(corpus);
      // process it with GATE
      this.GATEapplication.execute();

      AnnotationSet annots = null;
      if ("".equals(filters.getAnnotationSetName())) annots = gatedocument.getAnnotations();
      else annots = gatedocument.getAnnotations(filters.getAnnotationSetName());

      // enrich the input doc with the annotations from
      // the GATE application
      // transfer the annotations from the GATE document
      // to the Behemoth one using the filters
      List<com.digitalpebble.behemoth.Annotation> beheannotations =
          convertGATEAnnotationsToBehemoth(annots, inputDoc);

      // sort the annotations before adding them?
      Collections.sort(beheannotations);

      // clear the existing behemoth annotations
      if (clearBehemothAnnotations) {
        inputDoc.getAnnotations().clear();
      }

      inputDoc.getAnnotations().addAll(beheannotations);

      // add counters about num of annotations added
      if (reporter != null)
        for (com.digitalpebble.behemoth.Annotation annot : beheannotations) {
          reporter.incrCounter("GATE", annot.getType(), 1);
        }

      // Add the document features from GATE to Behemoth
      Set<String> docFeatFilter = this.filters.getDocFeaturesFilter();
      MapWritable beheMD = inputDoc.getMetadata(true);
      if (docFeatFilter.size() > 0) {
        for (String docFeatName : docFeatFilter) {
          Object featValue = gatedocument.getFeatures().get(docFeatName);
          if (featValue != null) {
            beheMD.put(new Text(docFeatName), new Text(featValue.toString()));
          }
        }
      }

      if (reporter != null) reporter.incrCounter("GATE", "Document", 1);

    } catch (Exception e) {
      LOG.error(inputDoc.getUrl().toString(), e);
      if (reporter != null) reporter.incrCounter("GATE", "Exceptions", 1);
    } finally {
      // remove the document from the corpus again
      corpus.clear();
      // and from memory
      if (gatedocument != null) Factory.deleteResource(gatedocument);
    }
    // currently returns only the input document
    return new BehemothDocument[] {inputDoc};
  }