Example #1
0
  public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    @SuppressWarnings("deprecation")
    FileSystem hadoopFs =
        FileUtil.getFileSystem(conf, AccumuloConfiguration.getSiteConfiguration());
    FileSystem localFs = FileSystem.getLocal(conf);
    Opts opts = new Opts();
    opts.parseArgs(PrintInfo.class.getName(), args);
    if (opts.files.isEmpty()) {
      System.err.println("No files were given");
      System.exit(-1);
    }

    long countBuckets[] = new long[11];
    long sizeBuckets[] = new long[countBuckets.length];
    long totalSize = 0;

    for (String arg : opts.files) {

      Path path = new Path(arg);
      FileSystem fs = hadoopFs.exists(path) ? hadoopFs : localFs; // fall back to local
      CachableBlockFile.Reader _rdr = new CachableBlockFile.Reader(fs, path, conf, null, null);
      Reader iter = new RFile.Reader(_rdr);

      iter.printInfo();
      System.out.println();
      org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(new String[] {arg});

      if (opts.histogram || opts.dump) {
        iter.seek(new Range((Key) null, (Key) null), new ArrayList<ByteSequence>(), false);
        while (iter.hasTop()) {
          Key key = iter.getTopKey();
          Value value = iter.getTopValue();
          if (opts.dump) System.out.println(key + " -> " + value);
          if (opts.histogram) {
            long size = key.getSize() + value.getSize();
            int bucket = (int) Math.log10(size);
            countBuckets[bucket]++;
            sizeBuckets[bucket] += size;
            totalSize += size;
          }
          iter.next();
        }
      }
      iter.close();
      if (opts.histogram) {
        System.out.println("Up to size      count      %-age");
        for (int i = 1; i < countBuckets.length; i++) {
          System.out.println(
              String.format(
                  "%11.0f : %10d %6.2f%%",
                  Math.pow(10, i), countBuckets[i], sizeBuckets[i] * 100. / totalSize));
        }
      }
    }
  }
 @Override
 protected void map(Key k, Value v, Context context) throws IOException, InterruptedException {
   try {
     if (key != null) assertEquals(key.getRow().toString(), new String(v.get()));
     assertEquals(k.getRow(), new Text(String.format("%09x", count + 1)));
     assertEquals(new String(v.get()), String.format("%09x", count));
   } catch (AssertionError e) {
     e1 = e;
   }
   key = new Key(k);
   count++;
 }
  public static VLongWritable getWritable(final Value v) {
    if (null == v) {
      throw new IllegalArgumentException("Value cannot be null");
    }

    ByteArrayInputStream bais = new ByteArrayInputStream(v.get());
    DataInputStream in = new DataInputStream(bais);

    VLongWritable writable = new VLongWritable();
    try {
      writable.readFields(in);
    } catch (IOException e) {
      // If this ever happens, some seriously screwed up is happening or someone subclasses Value
      // and made it do crazy stuff.
      throw new RuntimeException(e);
    }

    return writable;
  }
    public void map(Key keyIn, Value valueIn, Context context)
        throws IOException, InterruptedException {

      // Only process records containing tweet text
      if (keyIn.getColumnFamily().equals(new Text("text"))) {

        int docPartition = r.nextInt(MAX_PARTITION);

        // Split the text into tokens
        String[] tokens = valueIn.toString().split(splitRegex);

        // Process each word and add it as a key with its tweet ID as a value
        for (String token : tokens) {

          // Omit zero length tokens and tokens only containing special characters
          if (token.length() != 0 && !token.matches("[\\W]*")) {

            // Filter some of the garbage
            if (!token.matches(urlRegex)) {
              if (token.matches(tweetRegex)) {
                token = token.replaceAll(tweetRegex, "$1");
              } else {
                token = token.replaceAll(uglyTextRegex, "$1");
              }
            }

            // Set the outgoing key and value
            keyOut.set(Integer.toString(docPartition));
            String colFam = token.toLowerCase();
            String colQual = keyIn.getRow().toString();
            valueOut.set(colFam + ":" + colQual);

            // Create an output Key Value pair
            context.write(keyOut, valueOut);
          }
        }
      }
    }
    @Override
    public void map(Key ik, Value iv, Context context) {
      NNMetadata metadata = NNMetadata.inflate(iv.toString(), ik.getRow().toString());
      int inputNeuronCount =
          BinaryUtils.toBinary(metadata.getInputMax(), new double[] {metadata.getInputMax()}, true)
              .length;
      int num =
          BinaryUtils.toBinary(
                  metadata.getOutputMax(), new double[] {metadata.getOutputMax()}, false)
              .length;
      int categories = metadata.getOutputNameFields().size();

      ClassificationNetworkConf conf = new ClassificationNetworkConf();
      conf.setInputMax(metadata.getInputMax());
      conf.setOutputMax(metadata.getOutputMax());
      conf.setInputActivation(null);
      conf.setInputBias(true);
      conf.setInputNeuronCount(inputNeuronCount);

      conf.setHiddenActiviation(new ActivationSigmoid());
      conf.setHiddenBias(true);
      conf.setHiddenNeuronCount(2 ^ categories);
      conf.setOutputActivation(new ActivationSigmoid());
      conf.setOutputNeuronCount(num);

      conf.setNumberOfCategories(categories); // FIXME:This is bogus now
      conf.setBasicMLInput(this.getRandomArray(inputNeuronCount)); // FIXME:This is bogus now
      conf.setBasicIdealMLOutput(this.getRandomArray(num)); // FIXME:This is bogus now

      try {
        NNProcessor processor = NNProcessorFactory.getProcessorBean(conf);
        processor.constructNetworks(metadata.getArtifactId());
      } catch (RepositoryException e) {
        String gripe = "Access to the Repository Services died";
        log.log(Level.SEVERE, gripe, e);
        throw new StopMapperException(gripe, e);
      }
    }