@SuppressWarnings({"unchecked", "rawtypes"}) public void start( Path outputDir, int numReducers, boolean concurrent, String accessKey, String secretKey) throws GoraException, IOException, Exception { LOG.info("Running Verify with outputDir=" + outputDir + ", numReducers=" + numReducers); // DataStore<Long,CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new // Configuration()); auth = new BasicAWSCredentials(accessKey, secretKey); DataStore<Long, cidynamonode> store = WSDataStoreFactory.createDataStore( DynamoDBStore.class, DynamoDBKey.class, cidynamonode.class, auth); job = new Job(getConf()); if (!job.getConfiguration() .get("io.serializations") .contains("org.apache.hadoop.io.serializer.JavaSerialization")) { job.getConfiguration() .set( "io.serializations", job.getConfiguration().get("io.serializations") + ",org.apache.hadoop.io.serializer.JavaSerialization"); } job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); Query query = store.newQuery(); // if (!concurrent) { // no concurrency filtering, only need prev field // query.setFields("prev"); // } else { // readFlushed(job.getCon figuration()); // } GoraMapper.initMapperJob( job, query, store, DynamoDBKey.class, VLongWritable.class, VerifyMapper.class, true); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); store.close(); job.submit(); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); String[] entries = context.getConfiguration().getStrings("goraci.verify.flushed"); if (entries != null && entries.length > 0) { flushed = new HashMap<Utf8, Long>(); for (String entry : entries) { String[] kv = entry.split(":"); flushed.put(new Utf8(kv[0]), Long.parseLong(kv[1])); } } }