/** A comparator to sort pages ascending by date. */ public static final class SortByDateAscending implements Comparator<Page> { /** Delegate for Joda time objects. */ private final Comparator<Object> jodaCompare = DateTimeComparator.getInstance(); @Override public int compare(final Page o1, final Page o2) { return jodaCompare.compare(o1.getPublishingDate(), o2.getPublishingDate()); } }
// Update the start date for next time. The parameter the updateProgressDate is the date // of that retrieveHistory had gotten to when it completed or gave up. // If lastSync is set and is < updateProgressDate we will use that, and otherwise use // updateProgressDate. void updateStartDate(UpdateInfo updateInfo, ObjectType ot, DateTime updateProgressTime) { DateTimeComparator comparator = DateTimeComparator.getDateOnlyInstance(); // Calculate the name of the key in the ApiAttributes table // where the next start of update for this object type is // stored and retrieve the stored value. This stored value // may potentially be null if something happened to the attributes table String updateKeyName = "BodyMedia." + ot.getName() + ".updateStartDate"; String storedUpdateStartDate = guestService.getApiKeyAttribute(updateInfo.apiKey, updateKeyName); // Retrieve the lastSync date if it has been added to the // updateInfo context by an extractor DateTime lastSync = (DateTime) updateInfo.getContext("lastSync"); // Check which is earlier: the lastSync time returned from Bodymedia or the // point we were in the update that just ended. Store the lesser of the two // in nextUpdateStartDate String nextUpdateStartDate = storedUpdateStartDate; if (lastSync != null) { if (comparator.compare(updateProgressTime, lastSync) > 0) { // lastSync from Bodymedia is less than the update progress nextUpdateStartDate = lastSync.toString(formatter); } else { // the update progress is less than lastSync from Bodymedia nextUpdateStartDate = updateProgressTime.toString(formatter); } } else { // Last sync is null, just leave the stored updateTime // alone since it's better to get some extra data next // time than to skip data from dates that potentially changed } // Store the new value if it's different than what's stored in ApiKeyAttributes if (storedUpdateStartDate == null || !nextUpdateStartDate.equals(storedUpdateStartDate)) { guestService.setApiKeyAttribute(updateInfo.apiKey, updateKeyName, nextUpdateStartDate); } }
/** * Retrieves that history for the given facet from the start date to the end date. It peforms the * api calls in reverse order starting from the end date. This is so that the most recent * information is retrieved first. * * @param updateInfo The api's info * @param ot The ObjectType that represents the facet to be updated * @param start The earliest date for which the burn history is retrieved. This date is included * in the update. * @param end The latest date for which the burn history is retrieved. This date is also included * in the update. * @throws Exception If either storing the data fails or if the rate limit is reached on * Bodymedia's api */ private void retrieveHistory(UpdateInfo updateInfo, ObjectType ot, DateTime start, DateTime end) throws Exception { final String urlExtension = url.get(ot); final int increment = maxIncrement.get(ot); DateTimeComparator comparator = DateTimeComparator.getDateOnlyInstance(); DateTime current = start; // Setup the rate delay if we haven't already Long rateDelay = getRateDelay(updateInfo); try { // Loop from start to end, incrementing by the max number of days you can // specify for a given type of query. This is 1 for burn and sleep, and 31 for steps. // @ loop_invariant date.compareTo(userRegistrationDate) >= 0; while (comparator.compare(current, end) < 0) { if (guestService.getApiKey(updateInfo.apiKey.getId()) == null) { logger.info("Not updating BodyMedia connector instance with a deleted apiKeyId"); return; } String startPeriod = current.toString(formatter); String endPeriod = current.plusDays(increment - 1).toString(formatter); String minutesUrl = "http://api.bodymedia.com/v2/json/" + urlExtension + startPeriod + "/" + endPeriod + "?api_key=" + guestService.getApiKeyAttribute(updateInfo.apiKey, "bodymediaConsumerKey"); // The following call may fail due to bodymedia's api. That is expected behavior enforceRateLimits(rateDelay); String json = signpostHelper.makeRestCall(updateInfo.apiKey, ot.value(), minutesUrl); guestService.setApiKeyAttribute( updateInfo.apiKey, "timeOfLastCall", String.valueOf(System.currentTimeMillis())); JSONObject bodymediaResponse = JSONObject.fromObject(json); JSONArray daysArray = bodymediaResponse.getJSONArray("days"); if (bodymediaResponse.has("lastSync")) { DateTime d = form.parseDateTime(bodymediaResponse.getJSONObject("lastSync").getString("dateTime")); // Get timezone map from UpdateInfo context TimezoneMap tzMap = (TimezoneMap) updateInfo.getContext("tzMap"); // Insert lastSync into the updateInfo context so it's accessible to the updater updateInfo.setContext("lastSync", d); List<AbstractFacet> newFacets = new ArrayList<AbstractFacet>(); for (Object o : daysArray) { if (o instanceof JSONObject) { if (ot == ObjectType.getObjectType(connector(), "steps")) newFacets.add(createOrUpdateStepsFacet((JSONObject) o, updateInfo, d, tzMap)); else if (ot == ObjectType.getObjectType(connector(), "burn")) newFacets.add(createOrUpdateBurnFacet((JSONObject) o, updateInfo, d, tzMap)); else newFacets.add(createOrUpdateSleepFacet((JSONObject) o, updateInfo, d, tzMap)); } } bodyTrackStorageService.storeApiData(updateInfo.getGuestId(), newFacets); } current = current.plusDays(increment); // Update the stored value that controls when we will start updating next time updateStartDate(updateInfo, ot, current); } } catch (Exception e) { StringBuilder sb = new StringBuilder( "module=updateQueue component=updater action=BodymediaUpdater.retrieveHistory") .append(" message=\"exception while retrieving history\" connector=") .append(updateInfo.apiKey.getConnector().toString()) .append(" guestId=") .append(updateInfo.apiKey.getGuestId()) .append(" updatingDate=") .append(current); logger.info(sb.toString()); // Update the stored value that controls when we will start updating next time updateStartDate(updateInfo, ot, current); // Rethrow the error so that this task gets rescheduled throw e; } }
public boolean run() { try { /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) { throw new ISE( "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec()); } if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = Job.getInstance( new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info( "Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = Job.getInstance( new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath( JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info( "Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info( "Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) { List<ShardSpec> specs = config.JSON_MAPPER.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() {}); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info( "DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i)); } shardSpecs.put(segmentGranularity.getStart(), actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }
@Override public int compareTo(TaskRunnerWorkItem taskRunnerWorkItem) { return DateTimeComparator.getInstance() .compare(createdTime, taskRunnerWorkItem.getCreatedTime()); }