/** * Return a KijiDataRequest that describes which input columns need to be available. * * @return A kiji data request. */ public KijiDataRequest getDataRequest() { final KijiDataRequest dataRequest = mProducer.getDataRequest(); if (dataRequest.isEmpty()) { throw new JobConfigurationException( mProducer.getClass().getName() + " returned an empty KijiDataRequest, which is not allowed."); } return dataRequest; }
@Test public void testJSONBulkImporter() throws Exception { // Prepare input file: File inputFile = File.createTempFile("TestJSONImportInput", ".txt", getLocalTempDir()); TestingResources.writeTextFile( inputFile, TestingResources.get(BulkImporterTestUtils.JSON_IMPORT_DATA)); Configuration conf = getConf(); conf.set( DescribedInputTextBulkImporter.CONF_FILE, BulkImporterTestUtils.localResource(BulkImporterTestUtils.FOO_IMPORT_DESCRIPTOR)); // Run the bulk-import: final KijiMapReduceJob job = KijiBulkImportJobBuilder.create() .withConf(conf) .withBulkImporter(JSONBulkImporter.class) .withInput(MapReduceJobInputs.newTextMapReduceJobInput(new Path(inputFile.toString()))) .withOutput(new DirectKijiTableMapReduceJobOutput(mTable.getURI())) .build(); assertTrue(job.run()); final Counters counters = job.getHadoopJob().getCounters(); assertEquals( 3, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_PROCESSED).getValue()); assertEquals( 1, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE).getValue()); assertEquals( 0, counters.findCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_REJECTED).getValue()); // Validate output: final KijiRowScanner scanner = mReader.getScanner(KijiDataRequest.create("info")); BulkImporterTestUtils.validateImportedRows(scanner, false); scanner.close(); }
@Override public KijiDataRequest getDataRequest() { KijiDataRequestBuilder builder = KijiDataRequest.builder(); KijiDataRequestBuilder.ColumnsDef def = builder.newColumnsDef(); def.withMaxVersions(1); // Everything def.add(new KijiColumnName("data", "player_data")) .add(new KijiColumnName("data", "dire_towers_status")) .add(new KijiColumnName("data", "radiant_towers_status")) .add(new KijiColumnName("data", "dire_barracks_status")) .add(new KijiColumnName("data", "radiant_barracks_status")) .add(new KijiColumnName("data", "cluster")) .add(new KijiColumnName("data", "season")) .add(new KijiColumnName("data", "game_mode")) .add(new KijiColumnName("data", "match_seq_num")) .add(new KijiColumnName("data", "league_id")) .add(new KijiColumnName("data", "first_blood_time")) .add(new KijiColumnName("data", "negative_votes")) .add(new KijiColumnName("data", "duration")) .add(new KijiColumnName("data", "radiant_win")) .add(new KijiColumnName("data", "positive_votes")) .add(new KijiColumnName("data", "lobby_type")) .add(new KijiColumnName("data", "human_players")); return builder.addColumns(def).build(); }
@Test public void testBuilder() throws Exception { final KijiTableLayout layout = KijiTableLayout.newLayout(KijiTableLayouts.getLayout(KijiTableLayouts.SIMPLE)); final Kiji kiji = new InstanceBuilder() .withTable("table", layout) .withRow("row1") .withFamily("family") .withQualifier("column") .withValue(1, "foo1") .withValue(2, "foo2") .withRow("row2") .withFamily("family") .withQualifier("column") .withValue(100, "foo3") .build(); final KijiTable table = kiji.openTable("table"); final KijiTableReader reader = table.openTableReader(); // Verify the first row. final KijiDataRequest req = KijiDataRequest.create("family", "column"); final KijiRowData row1 = reader.get(table.getEntityId("row1"), req); assertEquals("foo2", row1.getValue("family", "column", 2).toString()); // Verify the second row. final KijiRowData row2 = reader.get(table.getEntityId("row2"), req); assertEquals("foo3", row2.getValue("family", "column", 100).toString()); ResourceUtils.closeOrLog(reader); ResourceUtils.releaseOrLog(table); ResourceUtils.releaseOrLog(kiji); }
private <T> T populateFromRow( EntitySpec<T> spec, T entity, long startTime, long endTime, Object... entityIdComponents) throws IOException { // TODO: Use a pool of tables and/or table readers final KijiTable table = mKiji.openTable(spec.getTableName()); try { final KijiTableReader reader = table.openTableReader(); try { final KijiDataRequestBuilder builder = KijiDataRequest.builder(); builder.withTimeRange(startTime, endTime); spec.populateColumnRequests(builder); final KijiDataRequest dataRequest = builder.build(); final EntityId entityId = table.getEntityId(entityIdComponents); final KijiRowData row = reader.get(entityId, dataRequest); try { return spec.populateEntityFromRow(entity, row); } catch (IllegalAccessException iae) { throw new RuntimeException(iae); } } finally { reader.close(); } } finally { table.release(); } }
/** A test to ensure that policies can mask the key value stores of their producers. */ @Test public void testKVMasking() throws IOException { // Create a freshness policy that knows where to find the text file backed kv-store. KijiFreshnessPolicy policy = new ShadowingFreshening("file:" + new File(getLocalTempDir(), KV_FILENAME)); // Install a freshness policy. KijiFreshnessManager manager = KijiFreshnessManager.create(getKiji()); try { manager.registerFreshener( "user", new KijiColumnName("info", "name"), policy, new UnconfiguredScoreFunction(), Collections.<String, String>emptyMap(), true, false); } finally { manager.close(); } final KijiTable userTable = getKiji().openTable("user"); try { final FreshKijiTableReader reader = FreshKijiTableReader.Builder.create().withTable(userTable).withTimeout(10000).build(); try { // Read from the table to ensure that the user name is updated. KijiRowData data = reader.get(userTable.getEntityId("felix"), KijiDataRequest.create("info", "name")); assertEquals("Old Gumbie Cat", data.getMostRecentValue("info", "name").toString()); } finally { reader.close(); } } finally { userTable.release(); } }
/** A test to make sure that producers run inside of freshening can access key value stores. */ @Test public void testSimpleKVStore() throws IOException { final String path = new Path("file:" + new File(getLocalTempDir(), KV_FILENAME)).toString(); final Map<String, String> params = Maps.newHashMap(); params.put(SimpleKVScoreFunction.PARAMETER_KEY, path); // Install a freshness policy. KijiFreshnessManager manager = KijiFreshnessManager.create(getKiji()); try { manager.registerFreshener( "user", new KijiColumnName("info", "name"), AlwaysFreshen.class.getName(), SimpleKVScoreFunction.class.getName(), params, true, false, false); } finally { manager.close(); } final KijiTable userTable = getKiji().openTable("user"); try { final FreshKijiTableReader reader = FreshKijiTableReader.Builder.create().withTable(userTable).withTimeout(10000).build(); try { // Read from the table to ensure that the user name is updated. KijiRowData data = reader.get(userTable.getEntityId("felix"), KijiDataRequest.create("info", "name")); assertEquals("Railway Cat", data.getMostRecentValue("info", "name").toString()); } finally { reader.close(); } } finally { userTable.release(); } }
@Test public void testKVStoreInIsFresh() throws IOException { // Create a freshness policy that knows where to find the text file backed kv-store. KijiFreshnessPolicy policy = new KVStoreInIsFreshPolicy("file:" + new File(getLocalTempDir(), KV_FILENAME)); // Install a freshness policy. KijiFreshnessManager manager = KijiFreshnessManager.create(getKiji()); try { manager.registerFreshener( "user", new KijiColumnName("info", "name"), policy, new UnconfiguredScoreFunction(), Collections.<String, String>emptyMap(), true, false); } finally { manager.close(); } KijiTable userTable = null; FreshKijiTableReader freshReader = null; try { userTable = getKiji().openTable("user"); freshReader = FreshKijiTableReader.Builder.create().withTable(userTable).withTimeout(10000).build(); // Read from the table to ensure that the user name is updated. KijiRowData data = freshReader.get(userTable.getEntityId("felix"), KijiDataRequest.create("info", "name")); // IsFresh should have returned true, so nothing should be written. assertEquals("Felis", data.getMostRecentValue("info", "name").toString()); } finally { ResourceUtils.closeOrLog(freshReader); ResourceUtils.releaseOrLog(userTable); } }
/** * Builder for KijiMapReduceJobs which run ScoreFunction implementations across all rows of a table. * * <p>A ScoreFunction MapReduce job runs a ScoreFunction against all rows within the specified range * of a table. It runs the ScoreFunction as if it was attached with an {@link * org.kiji.scoring.lib.AlwaysFreshen} policy which provides no additional parameters or * KeyValueStores. * * <p>ScoreFunction MapReduce jobs require that all information available to a ScoreFunction via the * FreshenerContext be specified during construction of the job. This information includes: * * <ul> * <li>attached column (normally this would be the column where the Freshener is attached) * <li>string-string parameter mapping (defaults to an empty map) * <li>client data request (normally this would be the request which triggered the run of the * Freshener) (defaults to an empty data request) * <li>KeyValueStores will be constructed from the return value of the ScoreFunction's * getRequiredStores method optionally overridden by KeyValueStores specified to {@link * #withKeyValueStoreOverrides(java.util.Map)}. This optional overriding makes up for the lack * of overrides normally provided by the KijiFreshnessPolicy. (defaults to an empty map) * </ul> * * <p>Example usage: * * <pre> * final KijiMapReduceJob sfJob = ScoreFunctionJobBuilder.create() * .withConf(conf) * .withInputTable(inputTableURI) * .withAttachedColumn(new KijiColumnName("family:qualifier")) * .withScoreFunctionClass(MyScoreFunction.class) * .withOutput(MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(inputTableURI)) * .build(); * sfJob.run(); * </pre> */ public final class ScoreFunctionJobBuilder extends KijiTableInputJobBuilder<ScoreFunctionJobBuilder> { public static final String SCORE_FUNCTION_CLASS_CONF_KEY = "org.kiji.scoring.batch.ScoreFunctionJobBuilder.sf_class_conf_key"; public static final String SCORE_FUNCTION_PARAMETERS_CONF_KEY = "org.kiji.scoring.batch.ScoreFunctionJobBuilder.sf_parameters_conf_key"; public static final String SCORE_FUNCTION_ATTACHED_COLUMN_CONF_KEY = "org.kiji.scoring.batch.ScoreFunctionJobBuilder.sf_attached_column_conf_key"; public static final String SCORE_FUNCTION_CLIENT_DATA_REQUEST_CONF_KEY = "org.kiji.scoring.batch.ScoreFunctionJobBuilder.sf_client_data_request_conf_key"; private static final Gson GSON = new Gson(); private static final KijiDataRequest DEFAULT_CLIENT_REQUEST = KijiDataRequest.builder().build(); private static final Map<String, String> DEFAULT_PARAMETERS = Maps.newHashMap(); private static final int DEFAULT_NUM_THREADS_PER_MAPPER = 1; private Class<? extends ScoreFunction<?>> mScoreFunctionClass = null; private KijiTableMapReduceJobOutput mJobOutput = null; private ScoreFunction<?> mScoreFunction = null; private KijiMapper<?, ?, ?, ?> mMapper = null; private KijiReducer<?, ?, ?, ?> mReducer = null; private KijiDataRequest mScoreFunctionDataRequest = null; private int mNumThreadsPerMapper = DEFAULT_NUM_THREADS_PER_MAPPER; private KijiDataRequest mClientDataRequest = null; private KijiColumnName mAttachedColumn = null; private Map<String, String> mParameters = null; private Map<String, KeyValueStore<?, ?>> mKeyValueStoreOverrides = null; /** Private constructor. Use {@link #create()}. */ private ScoreFunctionJobBuilder() {} /** * Create a new ScoreFunctionJobBuilder. * * @return a new ScoreFunctionJobBuilder. */ public static ScoreFunctionJobBuilder create() { return new ScoreFunctionJobBuilder(); } /** * Configure the Job to run the given ScoreFunction implementation to generate scores. * * @param scoreFunctionClass class of the ScoreFunction implementation with which to generate * scores. * @return this builder. */ public ScoreFunctionJobBuilder withScoreFunctionClass( final Class<? extends ScoreFunction<?>> scoreFunctionClass) { mScoreFunctionClass = scoreFunctionClass; return this; } /** * Configure the Job to output using the given KijiTableMapReduceJobOutput. The output table must * match the input table. * * @param jobOutput KijiTableMapReduceJobOutput which defines the output from this mapreduce job. * @return this builder. */ public ScoreFunctionJobBuilder withOutput(final KijiTableMapReduceJobOutput jobOutput) { mJobOutput = jobOutput; return super.withOutput(jobOutput); } /** {@inheritDoc} */ @Override public ScoreFunctionJobBuilder withOutput(final MapReduceJobOutput jobOutput) { if (jobOutput instanceof KijiTableMapReduceJobOutput) { return withOutput((KijiTableMapReduceJobOutput) jobOutput); } else { throw new RuntimeException( "jobOutput parameter of ScoreFunctionJobBuilder.withOutput() must " + "be a KijiTableMapReduceJobOutput."); } } /** * Sets the number of threads to use for running the ScoreFunction in parallel. * * <p>You may use this setting to run multiple instances of your ScoreFunction in parallel within * each map task of the job. This may be useful for increasing your throughput when your * ScoreFunction is not CPU bound. * * @param numThreads the number of ScoreFunctions which will be run in parallel per mapper. * @return this builder. */ public ScoreFunctionJobBuilder withNumThreadsPerMapper(final int numThreads) { Preconditions.checkArgument(0 < numThreads, "numThreads must be positive, got %d", numThreads); mNumThreadsPerMapper = numThreads; return this; } /** * Configure the Job to include the given client data request. This request will be visible to the * ScoreFunction via {@link org.kiji.scoring.FreshenerContext#getClientRequest()}. If unspecified, * an empty data request will be used. * * @param clientDataRequest KijiDataRequest which will be visible to the ScoreFunction. * @return this builder. */ public ScoreFunctionJobBuilder withClientDataRequest(final KijiDataRequest clientDataRequest) { mClientDataRequest = clientDataRequest; return this; } /** * Configure the Job to include the given attached column. This column will be visible to the * ScoreFunction via {@link org.kiji.scoring.FreshenerContext#getAttachedColumn()} and will be * used as the output column for values written by the ScoreFunction. The schema of this column * should be compatible with the schema of values output by the ScoreFunction. * * @param attachedColumn column to which to write ScoreFunction return values. * @return this builder. */ public ScoreFunctionJobBuilder withAttachedColumn(final KijiColumnName attachedColumn) { mAttachedColumn = attachedColumn; return this; } /** * Configure the Job to include the given parameters. These parameters should be the equivalent of * merging request and attachment time parameters from the real time execution of a Freshener. * * @param parameters parameters which will be available to the ScoreFunction via the * FreshenerContext. * @return this builder. */ public ScoreFunctionJobBuilder withParameters(final Map<String, String> parameters) { mParameters = parameters; return this; } /** * Configures the Job to use the given KeyValueStores in preference to those requested by the * ScoreFunction when there are name conflicts. This mirrors the KeyValueStore override behavior * provided by a KijiFreshnessPolicy. These KeyValueStores will only replace KeyValueStores * requested by the ScoreFunction with the same name. KeyValueStores requested by the * ScoreFunction whose names are not shadowed in this map will be available as normal. * * @param kvStoreOverrides KeyValueStores which will take precedence over stores requested by the * ScoreFunction. * @return this builder. */ public ScoreFunctionJobBuilder withKeyValueStoreOverrides( final Map<String, KeyValueStore<?, ?>> kvStoreOverrides) { mKeyValueStoreOverrides = kvStoreOverrides; return this; } /** {@inheritDoc} */ @Override protected void configureJob(final Job job) throws IOException { if (null == mScoreFunctionClass) { throw new JobConfigurationException("Must specify a ScoreFunction class."); } if (null == mClientDataRequest) { mClientDataRequest = DEFAULT_CLIENT_REQUEST; } if (null == mAttachedColumn) { throw new JobConfigurationException("Must specified an AttachedColumn."); } if (null == mParameters) { mParameters = DEFAULT_PARAMETERS; } final Configuration conf = job.getConfiguration(); conf.setClass(SCORE_FUNCTION_CLASS_CONF_KEY, mScoreFunctionClass, ScoreFunction.class); if (!getInputTableURI().equals(mJobOutput.getOutputTableURI())) { throw new JobConfigurationException( String.format( "Output table must be the same as the input" + "table. Got input: %s output: %s", getInputTableURI(), mJobOutput.getOutputTableURI())); } conf.set(SCORE_FUNCTION_ATTACHED_COLUMN_CONF_KEY, mAttachedColumn.getName()); conf.set(SCORE_FUNCTION_PARAMETERS_CONF_KEY, GSON.toJson(mParameters, Map.class)); conf.set( SCORE_FUNCTION_CLIENT_DATA_REQUEST_CONF_KEY, Base64.encodeBase64String(SerializationUtils.serialize(mClientDataRequest))); mMapper = new ScoreFunctionMapper(); mReducer = new IdentityReducer<Object, Object>(); job.setJobName("Kiji ScoreFunction: " + mScoreFunctionClass.getSimpleName()); mScoreFunction = ReflectionUtils.newInstance(mScoreFunctionClass, conf); final FreshenerContext context = InternalFreshenerContext.create( mClientDataRequest, mAttachedColumn, mParameters, Maps.<String, String>newHashMap(), KeyValueStoreReaderFactory.create(getRequiredStores())); mScoreFunctionDataRequest = mScoreFunction.getDataRequest(context); super.configureJob(job); } /** {@inheritDoc} */ @Override protected void configureMapper(final Job job) throws IOException { super.configureMapper(job); if (mNumThreadsPerMapper > 1) { @SuppressWarnings("unchecked") Class<? extends Mapper<EntityId, KijiRowData, Object, Object>> childMapperClass = (Class<? extends Mapper<EntityId, KijiRowData, Object, Object>>) mMapper.getClass(); KijiMultithreadedMapper.setMapperClass(job, childMapperClass); KijiMultithreadedMapper.setNumberOfThreads(job, mNumThreadsPerMapper); job.setMapperClass(KijiMultithreadedMapper.class); } } /** {@inheritDoc} */ @Override protected Map<String, KeyValueStore<?, ?>> getRequiredStores() { final FreshenerContext context = InternalFreshenerContext.create(mAttachedColumn, mParameters); final Map<String, KeyValueStore<?, ?>> combinedStores = Maps.newHashMap(); combinedStores.putAll(mScoreFunction.getRequiredStores(context)); if (null != mKeyValueStoreOverrides) { combinedStores.putAll(mKeyValueStoreOverrides); } return combinedStores; } /** {@inheritDoc} */ @Override protected KijiDataRequest getDataRequest() { return mScoreFunctionDataRequest; } /** {@inheritDoc} */ @Override protected KijiMapReduceJob build(final Job job) { return KijiMapReduceJob.create(job); } /** {@inheritDoc} */ @Override protected KijiMapper<?, ?, ?, ?> getMapper() { return mMapper; } /** {@inheritDoc} */ @Override protected KijiReducer<?, ?, ?, ?> getCombiner() { return null; } /** {@inheritDoc} */ @Override protected KijiReducer<?, ?, ?, ?> getReducer() { return mReducer; } /** {@inheritDoc} */ @Override protected Class<?> getJarClass() { return mScoreFunctionClass; } }
@Override public KijiDataRequest getDataRequest(FreshenerContext context) { return KijiDataRequest.builder().build(); }