public MapWritable getBucketAsKey(BucketStripped bucketStripped) throws IOException { MapWritable mw = new MapWritable(); MapWritable bucketMap = new MapWritable(); Bucket bucket = getBucket(bucketStripped); bucketMap.put(new IntWritable(bucket.hashCode()), bucket); mw.put(MAPWRITABLE_BUCKET_KEY, bucketMap); return mw; }
public MapWritable toMapWritableKey() { MapWritable mw = new MapWritable(); MapWritable bucketMap = new MapWritable(); for (IntWritable key : bucketCache.keySet()) { bucketMap.put(key, bucketCache.get(key)); } mw.put(MAPWRITABLE_BUCKET_KEY, bucketMap); return mw; }
@Override protected void map(Text key, DocumentMapping value, Context context) throws IOException, InterruptedException { // no need vector for this MR double distance = value.getDistance(); Vector vector = value.getVector(); // map output MapWritable m = new MapWritable(); m.put(ClusterEvaluatorMR.DOC_VECTOR_KEY, new VectorWritable(vector)); m.put(ClusterEvaluatorMR.DISTANCE_KEY, new DoubleWritable(distance)); m.put(ClusterEvaluatorMR.SQUARED_DISTANCE_KEY, new DoubleWritable(distance * distance)); m.put(ClusterEvaluatorMR.COUNT_KEY, new LongWritable(1)); context.write(key, m); }
@Override public Writable serialize(final Object obj, final ObjectInspector inspector) throws SerDeException { final StructObjectInspector structInspector = (StructObjectInspector) inspector; final List<? extends StructField> fields = structInspector.getAllStructFieldRefs(); if (fields.size() != columnNames.size()) { throw new SerDeException( String.format("Required %d columns, received %d.", columnNames.size(), fields.size())); } cachedWritable.clear(); for (int c = 0; c < fieldCount; c++) { StructField structField = fields.get(c); LOG.debug("fieldId=" + c + ",structField=" + structField.toString()); if (structField != null) { final Object field = structInspector.getStructFieldData(obj, fields.get(c)); final AbstractPrimitiveObjectInspector fieldOI = (AbstractPrimitiveObjectInspector) fields.get(c).getFieldObjectInspector(); Writable value = (Writable) fieldOI.getPrimitiveWritableObject(field); if (value == null) { continue; } LOG.debug("fieldCount=" + fieldCount + ",value=" + value.toString()); if (value instanceof IntWritable) { cachedWritable.put(new Text(columnNames.get(c)), value); } else if (value instanceof Text) { cachedWritable.put(new Text(columnNames.get(c)), ((Text) value)); } else if (value instanceof LongWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((LongWritable) value)); } else if (value instanceof DoubleWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((DoubleWritable) value)); } else if (value instanceof FloatWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((FloatWritable) value)); } else if (value instanceof BooleanWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((BooleanWritable) value)); } else if (value instanceof ByteWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((ByteWritable) value)); } else if (value instanceof BytesWritable) { cachedWritable.put(new Text(columnNames.get(c)), ((BytesWritable) value)); } else { LOG.warn("fieldCount=" + fieldCount + ",type=" + value.getClass().getName()); } } } return cachedWritable; }
@Override public void remove() throws IOException { MapWritable msg = new MapWritable(); msg.put(GraphJobRunner.FLAG_VERTEX_DECREASE, this.vertexID); // Get master task peer. String destPeer = GraphJobRunner.getMasterTask(this.getPeer()); runner.getPeer().send(destPeer, new GraphJobMessage(msg)); alterVertexCounter(-1); }
public MapWritable readMap(MapWritable mw) throws IOException { if (mw == null) { mw = new MapWritable(); } int length = in.readMapHeader(); for (int i = 0; i < length; i++) { Writable key = read(); Writable value = read(); mw.put(key, value); } return mw; }
public SequenceFileExportMapper() { Fields.put(new Text("full_path"), FullPath); Fields.put(new Text("extension"), Ext); Fields.put(new Text("sha1"), Sha); Fields.put(new Text("md5"), Md5); Fields.put(new Text("data"), Vid); Fields.put(new Text("hdfs_path"), HdfsPath); }
@Override public void addVertex(V vertexID, List<Edge<V, E>> edges, M value) throws IOException { MapWritable msg = new MapWritable(); // Create the new vertex. Vertex<V, E, M> vertex = GraphJobRunner.<V, E, M>newVertexInstance(GraphJobRunner.VERTEX_CLASS); vertex.setEdges(edges); vertex.setValue(value); vertex.setVertexID(vertexID); msg.put(GraphJobRunner.FLAG_VERTEX_INCREASE, vertex); runner.getPeer().send(runner.getHostName(vertexID), new GraphJobMessage(msg)); alterVertexCounter(1); }
@Override public boolean next(LongWritable keyHolder, MapWritable valueHolder) throws IOException { if (!cursor.hasNext()) { return false; } DBObject record = cursor.next(); keyHolder.set(pos); for (int i = 0; i < this.readColumns.length; i++) { String key = readColumns[i]; Object vObj = ("id".equals(key)) ? record.get("_id") : record.get(key); Writable value = (vObj == null) ? NullWritable.get() : new Text(vObj.toString()); valueHolder.put(new Text(key), value); } pos++; return true; }
@Override public boolean next(LongWritable keyHolder, MapWritable valueHolder) throws IOException { SolrDocument doc = cursor.nextDocument(); if (doc == null) { return false; } keyHolder.set(pos++); for (int i = 0; i < readColumns.length; i++) { String key = readColumns[i]; Object vObj = doc.getFieldValue(key); Writable value = (vObj == null) ? NullWritable.get() : new Text(vObj.toString()); valueHolder.put(new Text(key), value); } return true; }
public boolean next(LongWritable key, MapWritable value) throws IOException { try { if (!results.next()) { return false; } // Set the key field value as the output key value key.set(pos + split.getStart()); ResultSetMetaData resultsMetaData = results.getMetaData(); int columnCount = resultsMetaData.getColumnCount(); List<String> names = new ArrayList<String>(); List<Integer> types = new ArrayList<Integer>(); // The column count starts from 1 for (int i = 1; i <= columnCount; i++) { // This is the column name in db table String name = resultsMetaData.getColumnName(i).toLowerCase(); // Get the relevant metaTable name name = databaseProperties.getInputColumnMappingFields().get(name); int type = resultsMetaData.getColumnType(i); // Hive keeps column names in lowercase names.add(name.toLowerCase()); types.add(type); } for (int j = 0; j < types.size(); j++) { value.put(new Text(names.get(j)), getActualObjectTypeForValue(results, types, j)); } pos++; } catch (SQLException e) { throw new IOException(e.getMessage()); } return true; }
// Process an input document with GATE and a Reporter public synchronized BehemothDocument[] process(BehemothDocument inputDoc, Reporter reporter) { if (reporter != null) reporter.setStatus("GATE : " + inputDoc.getUrl().toString()); boolean clearBehemothAnnotations = config.getBoolean("gate.deleteBehemothAnnotations", false); // process the text passed as value with the application // a) create a GATE document based on the text value gate.Document gatedocument = null; try { gatedocument = generateGATEDoc(inputDoc); // add it to the current corpus corpus.add(gatedocument); // get the application and assign the corpus to it this.GATEapplication.setCorpus(corpus); // process it with GATE this.GATEapplication.execute(); AnnotationSet annots = null; if ("".equals(filters.getAnnotationSetName())) annots = gatedocument.getAnnotations(); else annots = gatedocument.getAnnotations(filters.getAnnotationSetName()); // enrich the input doc with the annotations from // the GATE application // transfer the annotations from the GATE document // to the Behemoth one using the filters List<com.digitalpebble.behemoth.Annotation> beheannotations = convertGATEAnnotationsToBehemoth(annots, inputDoc); // sort the annotations before adding them? Collections.sort(beheannotations); // clear the existing behemoth annotations if (clearBehemothAnnotations) { inputDoc.getAnnotations().clear(); } inputDoc.getAnnotations().addAll(beheannotations); // add counters about num of annotations added if (reporter != null) for (com.digitalpebble.behemoth.Annotation annot : beheannotations) { reporter.incrCounter("GATE", annot.getType(), 1); } // Add the document features from GATE to Behemoth Set<String> docFeatFilter = this.filters.getDocFeaturesFilter(); MapWritable beheMD = inputDoc.getMetadata(true); if (docFeatFilter.size() > 0) { for (String docFeatName : docFeatFilter) { Object featValue = gatedocument.getFeatures().get(docFeatName); if (featValue != null) { beheMD.put(new Text(docFeatName), new Text(featValue.toString())); } } } if (reporter != null) reporter.incrCounter("GATE", "Document", 1); } catch (Exception e) { LOG.error(inputDoc.getUrl().toString(), e); if (reporter != null) reporter.incrCounter("GATE", "Exceptions", 1); } finally { // remove the document from the corpus again corpus.clear(); // and from memory if (gatedocument != null) Factory.deleteResource(gatedocument); } // currently returns only the input document return new BehemothDocument[] {inputDoc}; }