예제 #1
0
  /** A comparator to sort pages ascending by date. */
  public static final class SortByDateAscending implements Comparator<Page> {

    /** Delegate for Joda time objects. */
    private final Comparator<Object> jodaCompare = DateTimeComparator.getInstance();

    @Override
    public int compare(final Page o1, final Page o2) {
      return jodaCompare.compare(o1.getPublishingDate(), o2.getPublishingDate());
    }
  }
예제 #2
0
  // Update the start date for next time.  The parameter the updateProgressDate is the date
  // of that retrieveHistory had gotten to when it completed or gave up.
  // If lastSync is set and is < updateProgressDate we will use that, and otherwise use
  // updateProgressDate.
  void updateStartDate(UpdateInfo updateInfo, ObjectType ot, DateTime updateProgressTime) {
    DateTimeComparator comparator = DateTimeComparator.getDateOnlyInstance();

    // Calculate the name of the key in the ApiAttributes table
    // where the next start of update for this object type is
    // stored and retrieve the stored value.  This stored value
    // may potentially be null if something happened to the attributes table
    String updateKeyName = "BodyMedia." + ot.getName() + ".updateStartDate";
    String storedUpdateStartDate =
        guestService.getApiKeyAttribute(updateInfo.apiKey, updateKeyName);

    // Retrieve the lastSync date if it has been added to the
    // updateInfo context by an extractor
    DateTime lastSync = (DateTime) updateInfo.getContext("lastSync");

    // Check which is earlier: the lastSync time returned from Bodymedia or the
    // point we were in the update that just ended.  Store the lesser of the two
    // in nextUpdateStartDate
    String nextUpdateStartDate = storedUpdateStartDate;
    if (lastSync != null) {
      if (comparator.compare(updateProgressTime, lastSync) > 0) {
        // lastSync from Bodymedia is less than the update progress
        nextUpdateStartDate = lastSync.toString(formatter);
      } else {
        // the update progress is less than lastSync from Bodymedia
        nextUpdateStartDate = updateProgressTime.toString(formatter);
      }
    } else {
      // Last sync is null, just leave the stored updateTime
      // alone since it's better to get some extra data next
      // time than to skip data from dates that potentially changed
    }

    // Store the new value if it's different than what's stored in ApiKeyAttributes
    if (storedUpdateStartDate == null || !nextUpdateStartDate.equals(storedUpdateStartDate)) {
      guestService.setApiKeyAttribute(updateInfo.apiKey, updateKeyName, nextUpdateStartDate);
    }
  }
예제 #3
0
  /**
   * Retrieves that history for the given facet from the start date to the end date. It peforms the
   * api calls in reverse order starting from the end date. This is so that the most recent
   * information is retrieved first.
   *
   * @param updateInfo The api's info
   * @param ot The ObjectType that represents the facet to be updated
   * @param start The earliest date for which the burn history is retrieved. This date is included
   *     in the update.
   * @param end The latest date for which the burn history is retrieved. This date is also included
   *     in the update.
   * @throws Exception If either storing the data fails or if the rate limit is reached on
   *     Bodymedia's api
   */
  private void retrieveHistory(UpdateInfo updateInfo, ObjectType ot, DateTime start, DateTime end)
      throws Exception {
    final String urlExtension = url.get(ot);
    final int increment = maxIncrement.get(ot);
    DateTimeComparator comparator = DateTimeComparator.getDateOnlyInstance();
    DateTime current = start;

    // Setup the rate delay if we haven't already
    Long rateDelay = getRateDelay(updateInfo);

    try {
      //  Loop from start to end, incrementing by the max number of days you can
      //  specify for a given type of query.  This is 1 for burn and sleep, and 31 for steps.
      // @ loop_invariant date.compareTo(userRegistrationDate) >= 0;
      while (comparator.compare(current, end) < 0) {
        if (guestService.getApiKey(updateInfo.apiKey.getId()) == null) {
          logger.info("Not updating BodyMedia connector instance with a deleted apiKeyId");
          return;
        }
        String startPeriod = current.toString(formatter);
        String endPeriod = current.plusDays(increment - 1).toString(formatter);
        String minutesUrl =
            "http://api.bodymedia.com/v2/json/"
                + urlExtension
                + startPeriod
                + "/"
                + endPeriod
                + "?api_key="
                + guestService.getApiKeyAttribute(updateInfo.apiKey, "bodymediaConsumerKey");
        // The following call may fail due to bodymedia's api. That is expected behavior
        enforceRateLimits(rateDelay);
        String json = signpostHelper.makeRestCall(updateInfo.apiKey, ot.value(), minutesUrl);
        guestService.setApiKeyAttribute(
            updateInfo.apiKey, "timeOfLastCall", String.valueOf(System.currentTimeMillis()));
        JSONObject bodymediaResponse = JSONObject.fromObject(json);
        JSONArray daysArray = bodymediaResponse.getJSONArray("days");
        if (bodymediaResponse.has("lastSync")) {
          DateTime d =
              form.parseDateTime(bodymediaResponse.getJSONObject("lastSync").getString("dateTime"));

          // Get timezone map from UpdateInfo context
          TimezoneMap tzMap = (TimezoneMap) updateInfo.getContext("tzMap");

          // Insert lastSync into the updateInfo context so it's accessible to the updater
          updateInfo.setContext("lastSync", d);
          List<AbstractFacet> newFacets = new ArrayList<AbstractFacet>();
          for (Object o : daysArray) {
            if (o instanceof JSONObject) {
              if (ot == ObjectType.getObjectType(connector(), "steps"))
                newFacets.add(createOrUpdateStepsFacet((JSONObject) o, updateInfo, d, tzMap));
              else if (ot == ObjectType.getObjectType(connector(), "burn"))
                newFacets.add(createOrUpdateBurnFacet((JSONObject) o, updateInfo, d, tzMap));
              else newFacets.add(createOrUpdateSleepFacet((JSONObject) o, updateInfo, d, tzMap));
            }
          }
          bodyTrackStorageService.storeApiData(updateInfo.getGuestId(), newFacets);
        }

        current = current.plusDays(increment);

        // Update the stored value that controls when we will start updating next time
        updateStartDate(updateInfo, ot, current);
      }

    } catch (Exception e) {
      StringBuilder sb =
          new StringBuilder(
                  "module=updateQueue component=updater action=BodymediaUpdater.retrieveHistory")
              .append(" message=\"exception while retrieving history\" connector=")
              .append(updateInfo.apiKey.getConnector().toString())
              .append(" guestId=")
              .append(updateInfo.apiKey.getGuestId())
              .append(" updatingDate=")
              .append(current);
      logger.info(sb.toString());

      // Update the stored value that controls when we will start updating next time
      updateStartDate(updateInfo, ot, current);

      // Rethrow the error so that this task gets rescheduled
      throw e;
    }
  }
  public boolean run() {
    try {
      /*
       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
       * in the final segment.
       */

      if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
        throw new ISE(
            "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]",
            config.getPartitionsSpec());
      }

      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        final Job groupByJob =
            Job.getInstance(
                new Configuration(),
                String.format(
                    "%s-determine_partitions_groupby-%s",
                    config.getDataSource(), config.getIntervals()));

        JobHelper.injectSystemProperties(groupByJob);
        config.addJobProperties(groupByJob);

        groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
        groupByJob.setMapOutputKeyClass(BytesWritable.class);
        groupByJob.setMapOutputValueClass(NullWritable.class);
        groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
        groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
        groupByJob.setOutputKeyClass(BytesWritable.class);
        groupByJob.setOutputValueClass(NullWritable.class);
        groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        JobHelper.setupClasspath(
            JobHelper.distributedClassPath(config.getWorkingPath()),
            JobHelper.distributedClassPath(config.makeIntermediatePath()),
            groupByJob);

        config.addInputPaths(groupByJob);
        config.intoConfiguration(groupByJob);
        FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

        groupByJob.submit();
        log.info(
            "Job %s submitted, status available at: %s",
            groupByJob.getJobName(), groupByJob.getTrackingURL());

        if (!groupByJob.waitForCompletion(true)) {
          log.error("Job failed: %s", groupByJob.getJobID());
          return false;
        }
      } else {
        log.info("Skipping group-by job.");
      }

      /*
       * Read grouped data and determine appropriate partitions.
       */
      final Job dimSelectionJob =
          Job.getInstance(
              new Configuration(),
              String.format(
                  "%s-determine_partitions_dimselection-%s",
                  config.getDataSource(), config.getIntervals()));

      dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

      JobHelper.injectSystemProperties(dimSelectionJob);
      config.addJobProperties(dimSelectionJob);

      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        // Read grouped data from the groupByJob.
        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
        dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
        FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
      } else {
        // Directly read the source data, since we assume it's already grouped.
        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
        config.addInputPaths(dimSelectionJob);
      }

      SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
      dimSelectionJob.setMapOutputValueClass(Text.class);
      dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
      dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
      dimSelectionJob.setOutputKeyClass(BytesWritable.class);
      dimSelectionJob.setOutputValueClass(Text.class);
      dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
      dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
      dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
      JobHelper.setupClasspath(
          JobHelper.distributedClassPath(config.getWorkingPath()),
          JobHelper.distributedClassPath(config.makeIntermediatePath()),
          dimSelectionJob);

      config.intoConfiguration(dimSelectionJob);
      FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

      dimSelectionJob.submit();
      log.info(
          "Job %s submitted, status available at: %s",
          dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL());

      if (!dimSelectionJob.waitForCompletion(true)) {
        log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
        return false;
      }

      /*
       * Load partitions determined by the previous job.
       */

      log.info(
          "Job completed, loading up partitions for intervals[%s].",
          config.getSegmentGranularIntervals());
      FileSystem fileSystem = null;
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs =
          Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
        if (fileSystem == null) {
          fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
        }
        if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
          List<ShardSpec> specs =
              config.JSON_MAPPER.readValue(
                  Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                  new TypeReference<List<ShardSpec>>() {});

          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
          for (int i = 0; i < specs.size(); ++i) {
            actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
            log.info(
                "DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
          }

          shardSpecs.put(segmentGranularity.getStart(), actualSpecs);
        } else {
          log.info("Path[%s] didn't exist!?", partitionInfoPath);
        }
      }
      config.setShardSpecs(shardSpecs);

      return true;
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }
  }
예제 #5
0
 @Override
 public int compareTo(TaskRunnerWorkItem taskRunnerWorkItem) {
   return DateTimeComparator.getInstance()
       .compare(createdTime, taskRunnerWorkItem.getCreatedTime());
 }