@SuppressWarnings("unchecked") @Override public void map(LongWritable positionToMap, LongWritable record, Context context) { try { S pos = game.hashToState(positionToMap.get()); game.longToRecord(pos, record.get(), rec); rec.previousPosition(); int numParents = 0; numParents = ((Undoable<S>) game).possibleParents(pos, parentStates); for (int i = 0; i < numParents; i++) { long parentHash = game.stateToHash(parentStates[i]); RangeFile parentFile = rangeFiles.getFile(parentHash); srp.state = parentHash; srp.record = game.recordToLong(parentStates[i], rec); context.write(parentFile, new StateRecordPair(srp.state, srp.record)); } } catch (IOException e) { throw new Error(e); } catch (InterruptedException e) { e.printStackTrace(); } }
/** * Map method. * * @param offset samples starting from the (offset+1)th sample. * @param size the number of samples for this map * @param out output {ture->numInside, false->numOutside} * @param reporter */ public void map( LongWritable offset, LongWritable size, OutputCollector<BooleanWritable, LongWritable> out, Reporter reporter) throws IOException { final HaltonSequence haltonsequence = new HaltonSequence(offset.get()); long numInside = 0L; long numOutside = 0L; for (long i = 0; i < size.get(); ) { // generate points in a unit square final double[] point = haltonsequence.nextPoint(); // count points inside/outside of the inscribed circle of the square final double x = point[0] - 0.5; final double y = point[1] - 0.5; if (x * x + y * y > 0.25) { numOutside++; } else { numInside++; } // report status i++; if (i % 1000 == 0) { reporter.setStatus("Generated " + i + " samples."); } } // output map results out.collect(new BooleanWritable(true), new LongWritable(numInside)); out.collect(new BooleanWritable(false), new LongWritable(numOutside)); }
/** * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate on the * workerID. Without JVM reuse each task refers to a unique workerID, so we will not find any * duplicates. With JVM reuse, however, each slot refers to a workerID, and there are duplicate * filenames due to partial aggregation and overwrite of fname (the RemoteParWorkerMapper ensures * uniqueness of those files independent of the runtime implementation). * * @param job * @param fname * @return * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") public static LocalVariableMap[] readResultFile(JobConf job, String fname) throws DMLRuntimeException, IOException { HashMap<Long, LocalVariableMap> tmp = new HashMap<Long, LocalVariableMap>(); FileSystem fs = FileSystem.get(job); Path path = new Path(fname); LongWritable key = new LongWritable(); // workerID Text value = new Text(); // serialized var header (incl filename) int countAll = 0; for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) { SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job), lpath, job); try { while (reader.next(key, value)) { // System.out.println("key="+key.get()+", value="+value.toString()); if (!tmp.containsKey(key.get())) tmp.put(key.get(), new LocalVariableMap()); Object[] dat = ProgramConverter.parseDataObject(value.toString()); tmp.get(key.get()).put((String) dat[0], (Data) dat[1]); countAll++; } } finally { if (reader != null) reader.close(); } } LOG.debug("Num remote worker results (before deduplication): " + countAll); LOG.debug("Num remote worker results: " + tmp.size()); // create return array return tmp.values().toArray(new LocalVariableMap[0]); }
public void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long max = 0; for (LongWritable val : values) { if (val.get() > max) max = val.get(); } context.write(key, new LongWritable(max)); }
private void compareToUDFUnixTimeStampDate(long t, long y) { TimestampWritable tsw = toTimestampWritable(t); LongWritable res = getLongWritable(tsw); if (res.get() != y) { System.out.printf( "%d vs %d for %d, %d\n", res.get(), y, t, tsw.getTimestamp().getTime() / 1000); } Assert.assertEquals(res.get(), y); }
@Override public LongWritable evaluate(LongWritable a, LongWritable b) { // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" // + b); if ((a == null) || (b == null)) { return null; } longWritable.set(a.get() + b.get()); return longWritable; }
public void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { StringBuilder out = new StringBuilder(); long max = Long.MIN_VALUE; for (LongWritable id : values) { if (id.get() > max) { max = id.get(); } } context.write(key, new LongWritable(max)); }
/** Compares in the descending order of the keys. */ @Override public int compare(WritableComparable a, WritableComparable b) { LongWritable o1 = (LongWritable) a; LongWritable o2 = (LongWritable) b; if (o1.get() < o2.get()) { return 1; } else if (o1.get() > o2.get()) { return -1; } else { return 0; } }
/** * Accumulate number of points inside/outside results from the mappers. * * @param isInside Is the points inside? * @param values An iterator to a list of point counts * @param context dummy, not used here. */ public void reduce(BooleanWritable isInside, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { if (isInside.get()) { for (LongWritable val : values) { numInside += val.get(); } } else { for (LongWritable val : values) { numOutside += val.get(); } } }
public void testInputFormat() { try { JobConf conf = new JobConf(); String TMP_DIR = System.getProperty("test.build.data", "/tmp"); Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile"); SequenceFile.Writer sfw = SequenceFile.createWriter( FileSystem.getLocal(conf), conf, filename, ChukwaArchiveKey.class, ChunkImpl.class, SequenceFile.CompressionType.NONE, Reporter.NULL); StringBuilder buf = new StringBuilder(); int offsets[] = new int[lines.length]; for (int i = 0; i < lines.length; ++i) { buf.append(lines[i]); buf.append("\n"); offsets[i] = buf.length() - 1; } ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0); ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null); val.setRecordOffsets(offsets); sfw.append(key, val); sfw.append(key, val); // write it twice sfw.close(); long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen(); InputSplit split = new FileSplit(filename, 0, len, (String[]) null); ChukwaInputFormat in = new ChukwaInputFormat(); RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL); LongWritable l = r.createKey(); Text line = r.createValue(); for (int i = 0; i < lines.length * 2; ++i) { boolean succeeded = r.next(l, line); assertTrue(succeeded); assertEquals(i, l.get()); assertEquals(lines[i % lines.length], line.toString()); System.out.println("read line: " + l.get() + " " + line); } boolean succeeded = r.next(l, line); assertFalse(succeeded); } catch (IOException e) { e.printStackTrace(); fail("IO exception " + e); } }
/** * Produce splits such that each is no greater than the quotient of the total size and the * number of splits requested. * * @param job The handle to the JobConf object * @param numSplits Number of splits requested */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { int cnfiles = job.getInt(SRC_COUNT_LABEL, -1); long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1); String srcfilelist = job.get(SRC_LIST_LABEL, ""); if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) { throw new RuntimeException( "Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize + ") listuri(" + srcfilelist + ")"); } Path src = new Path(srcfilelist); FileSystem fs = src.getFileSystem(job); FileStatus srcst = fs.getFileStatus(src); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); LongWritable key = new LongWritable(); FilePair value = new FilePair(); final long targetsize = cbsize / numSplits; long pos = 0L; long last = 0L; long acc = 0L; long cbrem = srcst.getLen(); SequenceFile.Reader sl = null; try { sl = new SequenceFile.Reader(fs, src, job); for (; sl.next(key, value); last = sl.getPosition()) { // if adding this split would put this split past the target size, // cut the last split and put this next file in the next split. if (acc + key.get() > targetsize && acc != 0) { long splitsize = last - pos; splits.add(new FileSplit(src, pos, splitsize, (String[]) null)); cbrem -= splitsize; pos = last; acc = 0L; } acc += key.get(); } } finally { checkAndClose(sl); } if (cbrem != 0) { splits.add(new FileSplit(src, pos, cbrem, (String[]) null)); } return splits.toArray(new FileSplit[splits.size()]); }
public void reduce( LongWritable userid, Iterator<LongWritable> tstamps, OutputCollector<Text, Session> output, Reporter reporter) throws IOException { // Copy the iterator to an array. ArrayList<LongWritable> tsarray = new ArrayList<LongWritable>(); while (tstamps.hasNext()) tsarray.add(new LongWritable(tstamps.next().get())); // Sort the timestamps. Collections.sort(tsarray); // Scan the array looking for session boundaries. long t0 = 0; long session_start = 0; long session_end = 0; int session_hits = 0; for (LongWritable tstamp : tsarray) { long tt = tstamp.get(); // How long since the prior hit? long delta = tt - t0; // Is this a new session? if (delta > SESSION_GAP_MSEC) { // Is there a prior session? if (session_start != 0) collect_session(userid.get(), session_start, session_end, session_hits, output); // Reset for the new session. session_start = tt; session_hits = 0; } // Extend the current session. session_hits += 1; session_end = tt; // On to the next hit ... t0 = tt; } // Write out the last session. if (session_start != 0) collect_session(userid.get(), session_start, session_end, session_hits, output); }
private void getOffsets(LogFilePath logFilePath, Set<Long> offsets) throws Exception { String path = logFilePath.getLogFilePath(); Path fsPath = new Path(path); FileSystem fileSystem = FileUtil.getFileSystem(path); SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration()); LongWritable key = (LongWritable) reader.getKeyClass().newInstance(); BytesWritable value = (BytesWritable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { if (!offsets.add(key.get())) { throw new RuntimeException( "duplicate key " + key.get() + " found in file " + logFilePath.getLogFilePath()); } } reader.close(); }
@Override public void map( LongWritable rawKey, Text rawValue, OutputCollector<TaggedFirstSecondIndexes, CSVReblockMR.BlockRow> out, Reporter reporter) throws IOException { if (_first) { rowOffset = offsetMap.get(rawKey.get()); _reporter = reporter; _first = false; } // output the header line if (rawKey.get() == 0 && _partFileWithHeader) { tfmapper.processHeaderLine(); if (tfmapper.hasHeader()) return; } // parse the input line and apply transformation String[] words = tfmapper.getWords(rawValue); if (!tfmapper.omit(words)) { words = tfmapper.apply(words); try { tfmapper.check(words); // Perform CSV Reblock CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0); idxRow = CSVReblockMapper.processRow( idxRow, words, rowOffset, num, ins.output, ins.brlen, ins.bclen, ins.fill, ins.fillValue, out); } catch (DMLRuntimeException e) { throw new RuntimeException(e.getMessage() + ":" + rawValue.toString()); } num++; } }
public static double getAsConstDouble(@Nonnull final ObjectInspector numberOI) throws UDFArgumentException { final String typeName = numberOI.getTypeName(); if (DOUBLE_TYPE_NAME.equals(typeName)) { DoubleWritable v = getConstValue(numberOI); return v.get(); } else if (FLOAT_TYPE_NAME.equals(typeName)) { FloatWritable v = getConstValue(numberOI); return v.get(); } else if (INT_TYPE_NAME.equals(typeName)) { IntWritable v = getConstValue(numberOI); return v.get(); } else if (BIGINT_TYPE_NAME.equals(typeName)) { LongWritable v = getConstValue(numberOI); return v.get(); } else if (SMALLINT_TYPE_NAME.equals(typeName)) { ShortWritable v = getConstValue(numberOI); return v.get(); } else if (TINYINT_TYPE_NAME.equals(typeName)) { ByteWritable v = getConstValue(numberOI); return v.get(); } throw new UDFArgumentException( "Unexpected argument type to cast as double: " + TypeInfoUtils.getTypeInfoFromObjectInspector(numberOI)); }
@Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { /* * Added by Xudong Zhang */ if (parseWeibo) { String word = key.toString(); if (!ChineseUtils.allChinese(word)) { context.getCounter("MyCounter", "NumWordsFilteredByWeiboParser").increment(1); return; } } long sum = 0; for (LongWritable value : values) { sum += value.get(); } if (sum >= minSupport) { context.write(key, new LongWritable(sum)); } else { context.getCounter("MyCounter", "NumWordsLessThanMinSupport").increment(1); } }
public void process(Writable value) throws HiveException { // A mapper can span multiple files/partitions. // The serializers need to be reset if the input file changed ExecMapperContext context = getExecContext(); if (context != null && context.inputFileChanged()) { // The child operators cleanup if input file has changed cleanUpInputFileChanged(); } int childrenDone = 0; for (MapOpCtx current : currentCtxs) { Object row = null; try { row = current.readRow(value, context); if (!current.forward(row)) { childrenDone++; } } catch (Exception e) { // TODO: policy on deserialization errors String message = toErrorMessage(value, row, current.rowObjectInspector); if (row == null) { deserialize_error_count.set(deserialize_error_count.get() + 1); throw new HiveException("Hive Runtime Error while processing writable " + message, e); } throw new HiveException("Hive Runtime Error while processing row " + message, e); } } rowsForwarded(childrenDone, 1); }
public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException { double sumOfRankValues = 0.0; String targetUrlsList = ""; int sourceUrl = (int) key.get(); int numUrls = context.getConfiguration().getInt("numUrls", 1); // hints each tuple may include: rank value tuple or link relation tuple for (Text value : values) { String[] strArray = value.toString().split("#"); // sourceUrl = Integer.parseInt(strArray[0].split("\t")[1]); if (strArray.length == 1) { sumOfRankValues += Double.parseDouble(strArray[0]); } else { StringBuffer sb = new StringBuffer(); for (int i = 1; i < strArray.length; i++) { sb.append("#" + strArray[i]); } targetUrlsList = sb.toString(); } } sumOfRankValues = 0.85 * sumOfRankValues + 0.15 * (1.0) / (double) numUrls; context.write(key, new Text(sumOfRankValues + targetUrlsList)); }
/** * write. * * @param out output stream * @throws IOException - */ @Override public void write(final DataOutput out) throws IOException { out.writeInt(this.relatedUsers.size()); for (LongWritable item : this.relatedUsers) { out.writeLong(item.get()); } }
@Override public void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long s = 0; for (LongWritable lw : values) s += lw.get(); context.write(key, new LongWritable(s)); }
@Override public void map(LongWritable row, NullWritable ignored, Context context) throws IOException, InterruptedException { context.setStatus("Entering"); long rowId = row.get(); if (rand == null) { // we use 3 random numbers per a row rand = new RandomGenerator(rowId * 3); } addKey(); value.clear(); // addRowId(rowId); addFiller(rowId); // New Mutation m = new Mutation(key); m.put( new Text("c"), // column family getRowIdString(rowId), // column qual new Value(value.toString().getBytes())); // data context.setStatus("About to add to accumulo"); context.write(tableName, m); context.setStatus("Added to accumulo " + key.toString()); }
public void reduce( LongWritable key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { // String line = ""; StringBuilder lineBuilder = new StringBuilder(); String tempStr = null; boolean first = true; if (key.get() != -1) { while (values.hasNext()) { if (first) { tempStr = values.next().toString().trim(); lineBuilder.append(tempStr); first = false; if (tempStr.equals("-1")) { lineBuilder = new StringBuilder(""); } } else { lineBuilder.append(" "); lineBuilder.append(values.next().toString()); } } output.collect(null, new Text(lineBuilder.toString())); } }
/** * Convert from long to an integer. This is called for CAST(... AS INT) * * @param i The long value to convert * @return IntWritable */ public IntWritable evaluate(LongWritable i) { if (i == null) { return null; } else { intWritable.set((int) i.get()); return intWritable; } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { id += increment; context.write( new LongWritable(id), new Text(String.format("%d, %s", key.get(), value.toString()))); }
public static long getConstLong(@Nonnull final ObjectInspector oi) throws UDFArgumentException { if (!isBigIntOI(oi)) { throw new UDFArgumentException( "argument must be a BigInt value: " + TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); } LongWritable v = getConstValue(oi); return v.get(); }
@Override protected void reduce(Text token, Iterable<LongWritable> counts, Context context) throws IOException, InterruptedException { long n = 0; for (LongWritable count : counts) n += count.get(); total.set(n); context.write(token, total); }
/** Reads the next key-value pair. */ public boolean next(LongWritable key, TrecDocument value) throws IOException { if (reader.next(pos, text) == false) { return false; } key.set(pos.get()); TrecDocument.readDocument(value, text.toString()); return true; }
/** * Propagates the smallest vertex id to all neighbors. Will always choose to halt and only * reactivate if a smaller id has been sent to it. * * @param messages Iterator of messages from the previous superstep. * @throws java.io.IOException */ @Override public void compute( Vertex<LongWritable, LongWritable, NullWritable> vertex, Iterable<LongWritable> messages) throws IOException { long currentComponent = vertex.getValue().get(); // First superstep is special, because we can simply look at the neighbors if (getSuperstep() == 0) { for (Edge<LongWritable, NullWritable> edge : vertex.getEdges()) { long neighbor = edge.getTargetVertexId().get(); if (neighbor < currentComponent) { currentComponent = neighbor; } } // Only need to send value if it is not the own id if (currentComponent != vertex.getValue().get()) { vertex.setValue(new LongWritable(currentComponent)); for (Edge<LongWritable, NullWritable> edge : vertex.getEdges()) { LongWritable neighbor = edge.getTargetVertexId(); if (neighbor.get() > currentComponent) { sendMessage(neighbor, vertex.getValue()); } } } vertex.voteToHalt(); return; } boolean changed = false; // did we get a smaller id ? for (LongWritable message : messages) { long candidateComponent = message.get(); if (candidateComponent < currentComponent) { currentComponent = candidateComponent; changed = true; } } // propagate new component id to the neighbors if (changed) { vertex.setValue(new LongWritable(currentComponent)); sendMessageToAllEdges(vertex, vertex.getValue()); } vertex.voteToHalt(); }
@Override protected void map(LongWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { if (Long.compareUnsigned(key.get(), start) >= 0 && Long.compareUnsigned(key.get(), end) <= 0) { // It is assumed that the passed BytesWritable value is always a *single* PacketInfo object. // Passing more than 1 // object will result in the whole set being passed through if any pass the filter. We // cannot serialize PacketInfo // objects back to byte arrays, otherwise we could support more than one packet. // Note: short-circuit findAny() func on stream boolean send = filteredPacketInfo(value).findAny().isPresent(); if (send) { context.write(key, value); } } }
@Override protected void reduce(Text category, Iterable<LongWritable> ones, Context context) throws IOException, InterruptedException { Long sum = 0L; for (LongWritable one : ones) { sum += one.get(); } context.write(category, new LongWritable(sum)); }