public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); @SuppressWarnings("deprecation") FileSystem hadoopFs = FileUtil.getFileSystem(conf, AccumuloConfiguration.getSiteConfiguration()); FileSystem localFs = FileSystem.getLocal(conf); Opts opts = new Opts(); opts.parseArgs(PrintInfo.class.getName(), args); if (opts.files.isEmpty()) { System.err.println("No files were given"); System.exit(-1); } long countBuckets[] = new long[11]; long sizeBuckets[] = new long[countBuckets.length]; long totalSize = 0; for (String arg : opts.files) { Path path = new Path(arg); FileSystem fs = hadoopFs.exists(path) ? hadoopFs : localFs; // fall back to local CachableBlockFile.Reader _rdr = new CachableBlockFile.Reader(fs, path, conf, null, null); Reader iter = new RFile.Reader(_rdr); iter.printInfo(); System.out.println(); org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(new String[] {arg}); if (opts.histogram || opts.dump) { iter.seek(new Range((Key) null, (Key) null), new ArrayList<ByteSequence>(), false); while (iter.hasTop()) { Key key = iter.getTopKey(); Value value = iter.getTopValue(); if (opts.dump) System.out.println(key + " -> " + value); if (opts.histogram) { long size = key.getSize() + value.getSize(); int bucket = (int) Math.log10(size); countBuckets[bucket]++; sizeBuckets[bucket] += size; totalSize += size; } iter.next(); } } iter.close(); if (opts.histogram) { System.out.println("Up to size count %-age"); for (int i = 1; i < countBuckets.length; i++) { System.out.println( String.format( "%11.0f : %10d %6.2f%%", Math.pow(10, i), countBuckets[i], sizeBuckets[i] * 100. / totalSize)); } } } }
@Override protected void map(Key k, Value v, Context context) throws IOException, InterruptedException { try { if (key != null) assertEquals(key.getRow().toString(), new String(v.get())); assertEquals(k.getRow(), new Text(String.format("%09x", count + 1))); assertEquals(new String(v.get()), String.format("%09x", count)); } catch (AssertionError e) { e1 = e; } key = new Key(k); count++; }
public static VLongWritable getWritable(final Value v) { if (null == v) { throw new IllegalArgumentException("Value cannot be null"); } ByteArrayInputStream bais = new ByteArrayInputStream(v.get()); DataInputStream in = new DataInputStream(bais); VLongWritable writable = new VLongWritable(); try { writable.readFields(in); } catch (IOException e) { // If this ever happens, some seriously screwed up is happening or someone subclasses Value // and made it do crazy stuff. throw new RuntimeException(e); } return writable; }
public void map(Key keyIn, Value valueIn, Context context) throws IOException, InterruptedException { // Only process records containing tweet text if (keyIn.getColumnFamily().equals(new Text("text"))) { int docPartition = r.nextInt(MAX_PARTITION); // Split the text into tokens String[] tokens = valueIn.toString().split(splitRegex); // Process each word and add it as a key with its tweet ID as a value for (String token : tokens) { // Omit zero length tokens and tokens only containing special characters if (token.length() != 0 && !token.matches("[\\W]*")) { // Filter some of the garbage if (!token.matches(urlRegex)) { if (token.matches(tweetRegex)) { token = token.replaceAll(tweetRegex, "$1"); } else { token = token.replaceAll(uglyTextRegex, "$1"); } } // Set the outgoing key and value keyOut.set(Integer.toString(docPartition)); String colFam = token.toLowerCase(); String colQual = keyIn.getRow().toString(); valueOut.set(colFam + ":" + colQual); // Create an output Key Value pair context.write(keyOut, valueOut); } } } }
@Override public void map(Key ik, Value iv, Context context) { NNMetadata metadata = NNMetadata.inflate(iv.toString(), ik.getRow().toString()); int inputNeuronCount = BinaryUtils.toBinary(metadata.getInputMax(), new double[] {metadata.getInputMax()}, true) .length; int num = BinaryUtils.toBinary( metadata.getOutputMax(), new double[] {metadata.getOutputMax()}, false) .length; int categories = metadata.getOutputNameFields().size(); ClassificationNetworkConf conf = new ClassificationNetworkConf(); conf.setInputMax(metadata.getInputMax()); conf.setOutputMax(metadata.getOutputMax()); conf.setInputActivation(null); conf.setInputBias(true); conf.setInputNeuronCount(inputNeuronCount); conf.setHiddenActiviation(new ActivationSigmoid()); conf.setHiddenBias(true); conf.setHiddenNeuronCount(2 ^ categories); conf.setOutputActivation(new ActivationSigmoid()); conf.setOutputNeuronCount(num); conf.setNumberOfCategories(categories); // FIXME:This is bogus now conf.setBasicMLInput(this.getRandomArray(inputNeuronCount)); // FIXME:This is bogus now conf.setBasicIdealMLOutput(this.getRandomArray(num)); // FIXME:This is bogus now try { NNProcessor processor = NNProcessorFactory.getProcessorBean(conf); processor.constructNetworks(metadata.getArtifactId()); } catch (RepositoryException e) { String gripe = "Access to the Repository Services died"; log.log(Level.SEVERE, gripe, e); throw new StopMapperException(gripe, e); } }