Example #1
0
  @SuppressWarnings({"unchecked", "rawtypes"})
  public void start(
      Path outputDir, int numReducers, boolean concurrent, String accessKey, String secretKey)
      throws GoraException, IOException, Exception {
    LOG.info("Running Verify with outputDir=" + outputDir + ", numReducers=" + numReducers);

    // DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new
    // Configuration());
    auth = new BasicAWSCredentials(accessKey, secretKey);

    DataStore<Long, cidynamonode> store =
        WSDataStoreFactory.createDataStore(
            DynamoDBStore.class, DynamoDBKey.class, cidynamonode.class, auth);

    job = new Job(getConf());

    if (!job.getConfiguration()
        .get("io.serializations")
        .contains("org.apache.hadoop.io.serializer.JavaSerialization")) {
      job.getConfiguration()
          .set(
              "io.serializations",
              job.getConfiguration().get("io.serializations")
                  + ",org.apache.hadoop.io.serializer.JavaSerialization");
    }

    job.setJobName("Link Verifier");
    job.setNumReduceTasks(numReducers);
    job.setJarByClass(getClass());

    Query query = store.newQuery();
    // if (!concurrent) {
    // no concurrency filtering, only need prev field
    // query.setFields("prev");
    // } else {
    // readFlushed(job.getCon  figuration());
    // }

    GoraMapper.initMapperJob(
        job, query, store, DynamoDBKey.class, VLongWritable.class, VerifyMapper.class, true);

    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);

    job.setReducerClass(VerifyReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    store.close();

    job.submit();
  }
Example #2
0
    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
      super.setup(context);

      String[] entries = context.getConfiguration().getStrings("goraci.verify.flushed");

      if (entries != null && entries.length > 0) {
        flushed = new HashMap<Utf8, Long>();
        for (String entry : entries) {
          String[] kv = entry.split(":");
          flushed.put(new Utf8(kv[0]), Long.parseLong(kv[1]));
        }
      }
    }