Example #1
0
 public static Path makeSegmentOutputPath(
     Path basePath,
     FileSystem fileSystem,
     String dataSource,
     String version,
     Interval interval,
     int partitionNum) {
   Path outputPath = new Path(prependFSIfNullScheme(fileSystem, basePath), "./" + dataSource);
   if ("hdfs".equals(fileSystem.getScheme())) {
     outputPath =
         new Path(
             outputPath,
             String.format(
                 "./%s_%s",
                 interval.getStart().toString(ISODateTimeFormat.basicDateTime()),
                 interval.getEnd().toString(ISODateTimeFormat.basicDateTime())));
     outputPath = new Path(outputPath, version.replace(":", "_"));
   } else {
     outputPath =
         new Path(
             outputPath,
             String.format(
                 "./%s_%s", interval.getStart().toString(), interval.getEnd().toString()));
     outputPath = new Path(outputPath, String.format("./%s", version));
   }
   outputPath = new Path(outputPath, Integer.toString(partitionNum));
   return outputPath;
 }
 @Override
 public void serialize(
     final Interval value, final JsonGenerator gen, final SerializerProvider provider)
     throws IOException {
   gen.writeStartObject();
   gen.writeStringField("startdatetime", formatter.print(value.getStart()));
   gen.writeStringField("starttimezone", value.getStart().getZone().toString());
   gen.writeStringField("enddatetime", formatter.print(value.getEnd()));
   gen.writeStringField("endtimezone", value.getEnd().getZone().toString());
   gen.writeEndObject();
 }
 public int getDaysInInterval(Interval interval) {
   LocalDate beginDate =
       getBeginDate().isBefore(interval.getStart().toLocalDate())
           ? interval.getStart().toLocalDate()
           : getBeginDate();
   LocalDate endDate =
       getEndDate() == null || getEndDate().isAfter(interval.getEnd().toLocalDate())
           ? interval.getEnd().toLocalDate()
           : getEndDate();
   return Days.daysBetween(beginDate, endDate).getDays();
 }
Example #4
0
  // TODO report status effectively
  @Override
  protected void runTask() {
    try {
      LocalDate today = LocalDate.now(DateTimeZone.UTC);
      LocalDate start = today.minusDays(minusDays);
      LocalDate finish = today.plusDays(plusDays);

      List<Channel> youViewChannels = channelResolver.getAllChannels();

      UpdateProgress progress = UpdateProgress.START;

      while (!start.isAfter(finish)) {
        LocalDate end = start.plusDays(1);
        for (Channel channel : youViewChannels) {
          Interval interval =
              new Interval(start.toDateTimeAtStartOfDay(), end.toDateTimeAtStartOfDay());
          Document xml =
              fetcher.getSchedule(interval.getStart(), interval.getEnd(), getYouViewId(channel));
          Element root = xml.getRootElement();
          Elements entries = root.getChildElements(ENTRY_KEY, root.getNamespaceURI(ATOM_PREFIX));

          progress = progress.reduce(processor.process(channel, entries, interval));
          reportStatus(progress.toString());
        }
        start = end;
      }
    } catch (Exception e) {
      log.error("Exception when processing YouView schedule", e);
      Throwables.propagate(e);
    }
  }
  private void addField(String name, Object value) {
    if (value == null) {
      return;
    }

    if (value instanceof LocalDate) { // TODO: is this correct? Shouldnt we use LocalTime here?
      DateTime time = ((LocalDate) value).toDateTimeAtStartOfDay();
      addDateTimeInUTC(name, time);
    } else if (value instanceof Interval) {
      Interval interval = (Interval) value;
      DateTime start = interval.getStart();
      DateTime end = interval.getEnd();
      addDateTimeInUTC(name + "_start", start);
      addDateTimeInUTC(name + "_end", end);
    } else if (value instanceof ValueObject) {
      ValueObject object = (ValueObject) value;
      inputDocument.addField(name, object.getIndexingValue());
    } else if (value instanceof AbstractEntity) {
      AbstractEntity object = (AbstractEntity) value;
      inputDocument.addField(name, object.getId());
    } else if (value instanceof Collection) {
      Collection list = (Collection) value;
      if (!list.isEmpty()) {
        for (Object object : list) {
          addField(name, object);
        }
      }
    } else {
      inputDocument.addField(name, value.toString());
    }
  }
  @Override
  public Iterable<Cursor> makeCursors(Filter filter, Interval interval, QueryGranularity gran) {
    Interval actualInterval = interval;
    if (!actualInterval.overlaps(index.dataInterval)) {
      return ImmutableList.of();
    }

    if (actualInterval.getStart().isBefore(index.dataInterval.getStart())) {
      actualInterval = actualInterval.withStart(index.dataInterval.getStart());
    }
    if (actualInterval.getEnd().isAfter(index.dataInterval.getEnd())) {
      actualInterval = actualInterval.withEnd(index.dataInterval.getEnd());
    }

    final Iterable<Cursor> iterable;
    if (filter == null) {
      iterable = new NoFilterCursorIterable(index, actualInterval, gran);
    } else {
      Offset offset = new ConciseOffset(filter.goConcise(new MMappedInvertedIndexSelector(index)));

      iterable = new CursorIterable(index, actualInterval, gran, offset);
    }

    return FunctionalIterable.create(iterable).keep(Functions.<Cursor>identity());
  }
  @Override
  public AggregateCount getCounts(String name, Interval interval, DateTimeField resolution) {

    DateTime end = interval.getEnd();
    Chronology c = interval.getChronology();
    DurationField resolutionDuration = resolution.getDurationField();

    long[] counts;

    if (resolutionDuration.getUnitMillis() == DateTimeConstants.MILLIS_PER_MINUTE) {
      // Iterate through each hour in the interval and load the minutes for it
      MutableDateTime dt = new MutableDateTime(interval.getStart());
      dt.setRounding(c.hourOfDay());
      Duration step = Duration.standardHours(1);
      List<long[]> hours = new ArrayList<long[]>();
      while (dt.isBefore(end)) {
        hours.add(getMinCountsForHour(name, dt));
        dt.add(step);
      }
      counts =
          MetricUtils.concatArrays(
              hours,
              interval.getStart().getMinuteOfHour(),
              interval.toPeriod().toStandardMinutes().getMinutes() + 1,
              60);

    } else if (resolutionDuration.getUnitMillis() == DateTimeConstants.MILLIS_PER_HOUR) {
      DateTime cursor = new DateTime(c.dayOfMonth().roundFloor(interval.getStart().getMillis()));
      List<long[]> days = new ArrayList<long[]>();
      Duration step = Duration.standardHours(24);
      while (cursor.isBefore(end)) {
        days.add(getHourCountsForDay(name, cursor));
        cursor = cursor.plus(step);
      }

      counts =
          MetricUtils.concatArrays(
              days,
              interval.getStart().getHourOfDay(),
              interval.toPeriod().toStandardHours().getHours() + 1,
              24);

    } else {
      throw new IllegalArgumentException("Only minute or hour resolution is currently supported");
    }
    return new AggregateCount(name, interval, counts, resolution);
  }
  private List<TimelineObjectHolder<VersionType, ObjectType>> lookup(
      Interval interval, boolean incompleteOk) {
    List<TimelineObjectHolder<VersionType, ObjectType>> retVal =
        new ArrayList<TimelineObjectHolder<VersionType, ObjectType>>();
    NavigableMap<Interval, TimelineEntry> timeline =
        (incompleteOk) ? incompletePartitionsTimeline : completePartitionsTimeline;

    for (Map.Entry<Interval, TimelineEntry> entry : timeline.entrySet()) {
      Interval timelineInterval = entry.getKey();
      TimelineEntry val = entry.getValue();

      if (timelineInterval.overlaps(interval)) {
        retVal.add(
            new TimelineObjectHolder<VersionType, ObjectType>(
                timelineInterval, val.getVersion(), val.getPartitionHolder()));
      }
    }

    if (retVal.isEmpty()) {
      return retVal;
    }

    TimelineObjectHolder<VersionType, ObjectType> firstEntry = retVal.get(0);
    if (interval.overlaps(firstEntry.getInterval())
        && interval.getStart().isAfter(firstEntry.getInterval().getStart())) {
      retVal.set(
          0,
          new TimelineObjectHolder<VersionType, ObjectType>(
              new Interval(interval.getStart(), firstEntry.getInterval().getEnd()),
              firstEntry.getVersion(),
              firstEntry.getObject()));
    }

    TimelineObjectHolder<VersionType, ObjectType> lastEntry = retVal.get(retVal.size() - 1);
    if (interval.overlaps(lastEntry.getInterval())
        && interval.getEnd().isBefore(lastEntry.getInterval().getEnd())) {
      retVal.set(
          retVal.size() - 1,
          new TimelineObjectHolder<VersionType, ObjectType>(
              new Interval(lastEntry.getInterval().getStart(), interval.getEnd()),
              lastEntry.getVersion(),
              lastEntry.getObject()));
    }

    return retVal;
  }
 public static CharSequence format(Interval interval) {
   StringBuilder sb = new StringBuilder();
   try {
     HOUR_MINUTE_FORMAT.printTo(sb, interval.getStart());
     sb.append(" - ");
     HOUR_MINUTE_FORMAT.printTo(sb, interval.getEnd());
   } catch (IOException e) {
     // Will never happen for StringBuilder
   }
   return sb;
 }
    public DeterminePartitionsDimSelectionMapperHelper(
        HadoopDruidIndexerConfig config, String partitionDimension) {
      this.config = config;
      this.partitionDimension = partitionDimension;

      final ImmutableMap.Builder<DateTime, Integer> timeIndexBuilder = ImmutableMap.builder();
      int idx = 0;
      for (final Interval bucketInterval : config.getGranularitySpec().bucketIntervals().get()) {
        timeIndexBuilder.put(bucketInterval.getStart(), idx);
        idx++;
      }

      this.intervalIndexes = timeIndexBuilder.build();
    }
Example #11
0
  @Override
  public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    final Set<Interval> intervals = Sets.newTreeSet(Comparators.intervals());
    Optional<Set<Interval>> optionalIntervals = config.getSegmentGranularIntervals();
    if (optionalIntervals.isPresent()) {
      for (Interval segmentInterval : optionalIntervals.get()) {
        for (Interval dataInterval : dataGranularity.getIterable(segmentInterval)) {
          intervals.add(dataInterval);
        }
      }
    }

    Path betaInput = new Path(inputPath);
    FileSystem fs = betaInput.getFileSystem(job.getConfiguration());
    Set<String> paths = Sets.newTreeSet();
    Pattern fileMatcher = Pattern.compile(filePattern);

    DateTimeFormatter customFormatter = null;
    if (pathFormat != null) {
      customFormatter = DateTimeFormat.forPattern(pathFormat);
    }

    for (Interval interval : intervals) {
      DateTime t = interval.getStart();
      String intervalPath = null;
      if (customFormatter != null) {
        intervalPath = customFormatter.print(t);
      } else {
        intervalPath = dataGranularity.toPath(t);
      }

      Path granularPath = new Path(betaInput, intervalPath);
      log.info("Checking path[%s]", granularPath);
      for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) {
        final Path filePath = status.getPath();
        if (fileMatcher.matcher(filePath.toString()).matches()) {
          paths.add(filePath.toString());
        }
      }
    }

    for (String path : paths) {
      log.info("Appending path[%s]", path);
      FileInputFormat.addInputPath(job, new Path(path));
    }

    return job;
  }
 @Override
 public RunTimeElements getRunTimeElements(Interval timeRange) {
   RunTimeElements result = new RunTimeElements(timeRange);
   RunTimeElements work = getNextSingleStarts(timeRange.getStart());
   for (RunTimeElement runtime : work.values()) {
     DateTime date = runtime.getStartDate();
     if (timeRange.contains(date)) {
       while (timeRange.contains(date)) {
         result.add(new RunTimeElement(date, runtime.getWhenHoliday()));
         date = date.plusWeeks(1);
       }
     }
   }
   //			Collections.sort(result, DateTimeComparator.getInstance());
   return result;
 }
    public void emitDimValueCounts(
        TaskInputOutputContext<?, ?, BytesWritable, Text> context,
        DateTime timestamp,
        Map<String, Iterable<String>> dims)
        throws IOException, InterruptedException {
      final Optional<Interval> maybeInterval =
          config.getGranularitySpec().bucketInterval(timestamp);

      if (!maybeInterval.isPresent()) {
        throw new ISE("WTF?! No bucket found for timestamp: %s", timestamp);
      }

      final Interval interval = maybeInterval.get();
      final int intervalIndex = intervalIndexes.get(interval.getStart());

      final ByteBuffer buf = ByteBuffer.allocate(4 + 8);
      buf.putInt(intervalIndex);
      buf.putLong(interval.getStartMillis());
      final byte[] groupKey = buf.array();

      // Emit row-counter value.
      write(context, groupKey, new DimValueCount("", "", 1));

      for (final Map.Entry<String, Iterable<String>> dimAndValues : dims.entrySet()) {
        final String dim = dimAndValues.getKey();

        if (partitionDimension == null || partitionDimension.equals(dim)) {
          final Iterable<String> dimValues = dimAndValues.getValue();

          if (Iterables.size(dimValues) == 1) {
            // Emit this value.
            write(
                context, groupKey, new DimValueCount(dim, Iterables.getOnlyElement(dimValues), 1));
          } else {
            // This dimension is unsuitable for partitioning. Poison it by emitting a negative
            // value.
            write(context, groupKey, new DimValueCount(dim, "", -1));
          }
        }
      }
    }
 public ActionForward sessionPostback(
     ActionMapping mapping,
     ActionForm actionForm,
     HttpServletRequest request,
     HttpServletResponse response) {
   CandidateSearchBean search = getRenderedObject("search");
   RenderUtils.invalidateViewState();
   Interval interval = search.getSession().getCandidacyInterval();
   if (search.getCutStart() == null
       || !interval.contains(search.getCutStart().toDateTimeAtStartOfDay())) {
     search.setCutStart(interval.getStart().toLocalDate());
   }
   if (search.getCutEnd() == null
       || !interval.contains(search.getCutEnd().toDateTimeAtStartOfDay())) {
     search.setCutEnd(interval.getEnd().toLocalDate());
   }
   if (interval.contains(new LocalDate().minusDays(1).toDateMidnight())) {
     search.setCutEnd(new LocalDate().minusDays(1));
   }
   request.setAttribute("search", search);
   return mapping.findForward("candidates");
 }
Example #15
0
  public static void createIndexDrdFile(
      byte versionId,
      File inDir,
      GenericIndexed<String> availableDimensions,
      GenericIndexed<String> availableMetrics,
      Interval dataInterval)
      throws IOException {
    File indexFile = new File(inDir, "index.drd");

    FileChannel channel = null;
    try {
      channel = new FileOutputStream(indexFile).getChannel();
      channel.write(ByteBuffer.wrap(new byte[] {versionId}));

      availableDimensions.writeToChannel(channel);
      availableMetrics.writeToChannel(channel);
      serializerUtils.writeString(
          channel, String.format("%s/%s", dataInterval.getStart(), dataInterval.getEnd()));
    } finally {
      Closeables.closeQuietly(channel);
      channel = null;
    }
    IndexIO.checkFileSize(indexFile);
  }
 protected LogInterval visibleIntervalFor(Rectangle clipBounds) {
   Interval interval = uiTimeScale.viewToModel(clipBounds);
   return new LogInterval(
       new LogInstant(interval.getStart().getMillis() - 1, 0),
       new LogInstant(interval.getEnd().getMillis() + 1, Integer.MAX_VALUE));
 }
  /**
   * @param timeline
   * @param key
   * @param entry
   * @return boolean flag indicating whether or not we inserted or discarded something
   */
  private boolean addAtKey(
      NavigableMap<Interval, TimelineEntry> timeline, Interval key, TimelineEntry entry) {
    boolean retVal = false;
    Interval currKey = key;
    Interval entryInterval = entry.getTrueInterval();

    if (!currKey.overlaps(entryInterval)) {
      return false;
    }

    while (entryInterval != null && currKey != null && currKey.overlaps(entryInterval)) {
      Interval nextKey = timeline.higherKey(currKey);

      int versionCompare =
          versionComparator.compare(entry.getVersion(), timeline.get(currKey).getVersion());

      if (versionCompare < 0) {
        if (currKey.contains(entryInterval)) {
          return true;
        } else if (currKey.getStart().isBefore(entryInterval.getStart())) {
          entryInterval = new Interval(currKey.getEnd(), entryInterval.getEnd());
        } else {
          addIntervalToTimeline(
              new Interval(entryInterval.getStart(), currKey.getStart()), entry, timeline);

          if (entryInterval.getEnd().isAfter(currKey.getEnd())) {
            entryInterval = new Interval(currKey.getEnd(), entryInterval.getEnd());
          } else {
            entryInterval = null; // discard this entry
          }
        }
      } else if (versionCompare > 0) {
        TimelineEntry oldEntry = timeline.remove(currKey);

        if (currKey.contains(entryInterval)) {
          addIntervalToTimeline(
              new Interval(currKey.getStart(), entryInterval.getStart()), oldEntry, timeline);
          addIntervalToTimeline(
              new Interval(entryInterval.getEnd(), currKey.getEnd()), oldEntry, timeline);
          addIntervalToTimeline(entryInterval, entry, timeline);

          return true;
        } else if (currKey.getStart().isBefore(entryInterval.getStart())) {
          addIntervalToTimeline(
              new Interval(currKey.getStart(), entryInterval.getStart()), oldEntry, timeline);
        } else if (entryInterval.getEnd().isBefore(currKey.getEnd())) {
          addIntervalToTimeline(
              new Interval(entryInterval.getEnd(), currKey.getEnd()), oldEntry, timeline);
        }
      } else {
        if (timeline.get(currKey).equals(entry)) {
          // This occurs when restoring segments
          timeline.remove(currKey);
        } else {
          throw new UnsupportedOperationException(
              String.format(
                  "Cannot add overlapping segments [%s and %s] with the same version [%s]",
                  currKey, entryInterval, entry.getVersion()));
        }
      }

      currKey = nextKey;
      retVal = true;
    }

    addIntervalToTimeline(entryInterval, entry, timeline);

    return retVal;
  }
  public boolean run() {
    try {
      /*
       * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
       * in the final segment.
       */

      if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
        throw new ISE(
            "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]",
            config.getPartitionsSpec());
      }

      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        final Job groupByJob =
            Job.getInstance(
                new Configuration(),
                String.format(
                    "%s-determine_partitions_groupby-%s",
                    config.getDataSource(), config.getIntervals()));

        JobHelper.injectSystemProperties(groupByJob);
        config.addJobProperties(groupByJob);

        groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
        groupByJob.setMapOutputKeyClass(BytesWritable.class);
        groupByJob.setMapOutputValueClass(NullWritable.class);
        groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
        groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
        groupByJob.setOutputKeyClass(BytesWritable.class);
        groupByJob.setOutputValueClass(NullWritable.class);
        groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
        JobHelper.setupClasspath(
            JobHelper.distributedClassPath(config.getWorkingPath()),
            JobHelper.distributedClassPath(config.makeIntermediatePath()),
            groupByJob);

        config.addInputPaths(groupByJob);
        config.intoConfiguration(groupByJob);
        FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

        groupByJob.submit();
        log.info(
            "Job %s submitted, status available at: %s",
            groupByJob.getJobName(), groupByJob.getTrackingURL());

        if (!groupByJob.waitForCompletion(true)) {
          log.error("Job failed: %s", groupByJob.getJobID());
          return false;
        }
      } else {
        log.info("Skipping group-by job.");
      }

      /*
       * Read grouped data and determine appropriate partitions.
       */
      final Job dimSelectionJob =
          Job.getInstance(
              new Configuration(),
              String.format(
                  "%s-determine_partitions_dimselection-%s",
                  config.getDataSource(), config.getIntervals()));

      dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

      JobHelper.injectSystemProperties(dimSelectionJob);
      config.addJobProperties(dimSelectionJob);

      if (!config.getPartitionsSpec().isAssumeGrouped()) {
        // Read grouped data from the groupByJob.
        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
        dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
        FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
      } else {
        // Directly read the source data, since we assume it's already grouped.
        dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
        config.addInputPaths(dimSelectionJob);
      }

      SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
      dimSelectionJob.setMapOutputValueClass(Text.class);
      dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
      dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
      dimSelectionJob.setOutputKeyClass(BytesWritable.class);
      dimSelectionJob.setOutputValueClass(Text.class);
      dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
      dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class);
      dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
      JobHelper.setupClasspath(
          JobHelper.distributedClassPath(config.getWorkingPath()),
          JobHelper.distributedClassPath(config.makeIntermediatePath()),
          dimSelectionJob);

      config.intoConfiguration(dimSelectionJob);
      FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

      dimSelectionJob.submit();
      log.info(
          "Job %s submitted, status available at: %s",
          dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL());

      if (!dimSelectionJob.waitForCompletion(true)) {
        log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
        return false;
      }

      /*
       * Load partitions determined by the previous job.
       */

      log.info(
          "Job completed, loading up partitions for intervals[%s].",
          config.getSegmentGranularIntervals());
      FileSystem fileSystem = null;
      Map<DateTime, List<HadoopyShardSpec>> shardSpecs =
          Maps.newTreeMap(DateTimeComparator.getInstance());
      int shardCount = 0;
      for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
        if (fileSystem == null) {
          fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
        }
        if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
          List<ShardSpec> specs =
              config.JSON_MAPPER.readValue(
                  Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                  new TypeReference<List<ShardSpec>>() {});

          List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
          for (int i = 0; i < specs.size(); ++i) {
            actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
            log.info(
                "DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
          }

          shardSpecs.put(segmentGranularity.getStart(), actualSpecs);
        } else {
          log.info("Path[%s] didn't exist!?", partitionInfoPath);
        }
      }
      config.setShardSpecs(shardSpecs);

      return true;
    } catch (Exception e) {
      throw Throwables.propagate(e);
    }
  }
Example #19
0
 public void setInterval(Interval interval) {
   if (getType().isAbsence()) {
     this.end = interval.getEnd().toDate();
     this.start = interval.getStart().toDate();
   } else throw new IllegalStateException("Intervals only valid for absences");
 }
 @Override
 public String write(Interval arg0) throws YamlException {
   return dateTimeMapper.write(arg0.getStart()) + "|" + dateTimeMapper.write(arg0.getEnd());
 }
Example #21
0
  @Override
  public Sequence<T> run(final Query<T> query) {
    final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
    final CacheStrategy<T, Object, Query<T>> strategy = toolChest.getCacheStrategy(query);

    final Map<DruidServer, List<SegmentDescriptor>> serverSegments = Maps.newTreeMap();

    final List<Pair<DateTime, byte[]>> cachedResults = Lists.newArrayList();
    final Map<String, CachePopulator> cachePopulatorMap = Maps.newHashMap();

    final boolean useCache =
        Boolean.parseBoolean(query.getContextValue("useCache", "true")) && strategy != null;
    final boolean populateCache =
        Boolean.parseBoolean(query.getContextValue("populateCache", "true")) && strategy != null;
    final boolean isBySegment = Boolean.parseBoolean(query.getContextValue("bySegment", "false"));

    ImmutableMap.Builder<String, String> contextBuilder = new ImmutableMap.Builder<>();

    final String priority = query.getContextValue("priority", "0");
    contextBuilder.put("priority", priority);

    if (populateCache) {
      contextBuilder.put("bySegment", "true");
    }
    contextBuilder.put("intermediate", "true");

    final Query<T> rewrittenQuery = query.withOverriddenContext(contextBuilder.build());

    VersionedIntervalTimeline<String, ServerSelector> timeline =
        serverView.getTimeline(query.getDataSource());
    if (timeline == null) {
      return Sequences.empty();
    }

    // build set of segments to query
    Set<Pair<ServerSelector, SegmentDescriptor>> segments = Sets.newLinkedHashSet();

    List<TimelineObjectHolder<String, ServerSelector>> serversLookup = Lists.newLinkedList();

    for (Interval interval : rewrittenQuery.getIntervals()) {
      serversLookup.addAll(timeline.lookup(interval));
    }

    // Let tool chest filter out unneeded segments
    final List<TimelineObjectHolder<String, ServerSelector>> filteredServersLookup =
        toolChest.filterSegments(query, serversLookup);

    for (TimelineObjectHolder<String, ServerSelector> holder : filteredServersLookup) {
      for (PartitionChunk<ServerSelector> chunk : holder.getObject()) {
        ServerSelector selector = chunk.getObject();
        final SegmentDescriptor descriptor =
            new SegmentDescriptor(
                holder.getInterval(), holder.getVersion(), chunk.getChunkNumber());

        segments.add(Pair.of(selector, descriptor));
      }
    }

    final byte[] queryCacheKey;
    if (strategy != null) {
      queryCacheKey = strategy.computeCacheKey(query);
    } else {
      queryCacheKey = null;
    }

    if (queryCacheKey != null) {
      Map<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> cacheKeys = Maps.newHashMap();
      for (Pair<ServerSelector, SegmentDescriptor> segment : segments) {
        final Cache.NamedKey segmentCacheKey =
            computeSegmentCacheKey(
                segment.lhs.getSegment().getIdentifier(), segment.rhs, queryCacheKey);
        cacheKeys.put(segment, segmentCacheKey);
      }

      // Pull cached segments from cache and remove from set of segments to query
      final Map<Cache.NamedKey, byte[]> cachedValues;
      if (useCache) {
        cachedValues = cache.getBulk(cacheKeys.values());
      } else {
        cachedValues = ImmutableMap.of();
      }

      for (Map.Entry<Pair<ServerSelector, SegmentDescriptor>, Cache.NamedKey> entry :
          cacheKeys.entrySet()) {
        Pair<ServerSelector, SegmentDescriptor> segment = entry.getKey();
        Cache.NamedKey segmentCacheKey = entry.getValue();
        final Interval segmentQueryInterval = segment.rhs.getInterval();

        final byte[] cachedValue = cachedValues.get(segmentCacheKey);
        if (cachedValue != null) {
          // remove cached segment from set of segments to query
          segments.remove(segment);
          cachedResults.add(Pair.of(segmentQueryInterval.getStart(), cachedValue));
        } else if (populateCache) {
          final String segmentIdentifier = segment.lhs.getSegment().getIdentifier();
          cachePopulatorMap.put(
              String.format("%s_%s", segmentIdentifier, segmentQueryInterval),
              new CachePopulator(cache, objectMapper, segmentCacheKey));
        }
      }
    }

    // Compile list of all segments not pulled from cache
    for (Pair<ServerSelector, SegmentDescriptor> segment : segments) {
      final QueryableDruidServer queryableDruidServer = segment.lhs.pick();

      if (queryableDruidServer == null) {
        log.error("No servers found for %s?! How can this be?!", segment.rhs);
      } else {
        final DruidServer server = queryableDruidServer.getServer();
        List<SegmentDescriptor> descriptors = serverSegments.get(server);

        if (descriptors == null) {
          descriptors = Lists.newArrayList();
          serverSegments.put(server, descriptors);
        }

        descriptors.add(segment.rhs);
      }
    }

    return new LazySequence<>(
        new Supplier<Sequence<T>>() {
          @Override
          public Sequence<T> get() {
            ArrayList<Pair<DateTime, Sequence<T>>> listOfSequences = Lists.newArrayList();

            addSequencesFromServer(listOfSequences);
            addSequencesFromCache(listOfSequences);

            Collections.sort(
                listOfSequences,
                Ordering.natural().onResultOf(Pair.<DateTime, Sequence<T>>lhsFn()));

            final Sequence<Sequence<T>> seq =
                Sequences.simple(
                    Iterables.transform(listOfSequences, Pair.<DateTime, Sequence<T>>rhsFn()));
            if (strategy == null) {
              return toolChest.mergeSequences(seq);
            } else {
              return strategy.mergeSequences(seq);
            }
          }

          private void addSequencesFromCache(
              ArrayList<Pair<DateTime, Sequence<T>>> listOfSequences) {
            if (strategy == null) {
              return;
            }

            final Function<Object, T> pullFromCacheFunction = strategy.pullFromCache();
            final TypeReference<Object> cacheObjectClazz = strategy.getCacheObjectClazz();
            for (Pair<DateTime, byte[]> cachedResultPair : cachedResults) {
              final byte[] cachedResult = cachedResultPair.rhs;
              Sequence<Object> cachedSequence =
                  new BaseSequence<>(
                      new BaseSequence.IteratorMaker<Object, Iterator<Object>>() {
                        @Override
                        public Iterator<Object> make() {
                          try {
                            if (cachedResult.length == 0) {
                              return Iterators.emptyIterator();
                            }

                            return objectMapper.readValues(
                                objectMapper.getFactory().createParser(cachedResult),
                                cacheObjectClazz);
                          } catch (IOException e) {
                            throw Throwables.propagate(e);
                          }
                        }

                        @Override
                        public void cleanup(Iterator<Object> iterFromMake) {}
                      });
              listOfSequences.add(
                  Pair.of(
                      cachedResultPair.lhs, Sequences.map(cachedSequence, pullFromCacheFunction)));
            }
          }

          @SuppressWarnings("unchecked")
          private void addSequencesFromServer(
              ArrayList<Pair<DateTime, Sequence<T>>> listOfSequences) {
            for (Map.Entry<DruidServer, List<SegmentDescriptor>> entry :
                serverSegments.entrySet()) {
              final DruidServer server = entry.getKey();
              final List<SegmentDescriptor> descriptors = entry.getValue();

              final QueryRunner clientQueryable = serverView.getQueryRunner(server);
              if (clientQueryable == null) {
                log.makeAlert("WTF!? server[%s] doesn't have a client Queryable?", server).emit();
                continue;
              }

              final Sequence<T> resultSeqToAdd;
              final MultipleSpecificSegmentSpec segmentSpec =
                  new MultipleSpecificSegmentSpec(descriptors);
              List<Interval> intervals = segmentSpec.getIntervals();

              if (!server.isAssignable() || !populateCache || isBySegment) {
                resultSeqToAdd = clientQueryable.run(query.withQuerySegmentSpec(segmentSpec));
              } else {
                resultSeqToAdd =
                    toolChest.mergeSequences(
                        Sequences.map(
                            clientQueryable.run(rewrittenQuery.withQuerySegmentSpec(segmentSpec)),
                            new Function<Object, Sequence<T>>() {
                              private final Function<T, Object> prepareForCache =
                                  strategy.prepareForCache();

                              @Override
                              public Sequence<T> apply(Object input) {
                                Result<Object> result = (Result<Object>) input;
                                final BySegmentResultValueClass<T> value =
                                    (BySegmentResultValueClass<T>) result.getValue();
                                String segmentIdentifier = value.getSegmentId();
                                final Iterable<T> segmentResults = value.getResults();

                                CachePopulator cachePopulator =
                                    cachePopulatorMap.get(
                                        String.format(
                                            "%s_%s", segmentIdentifier, value.getInterval()));
                                if (cachePopulator != null) {
                                  cachePopulator.populate(
                                      Iterables.transform(segmentResults, prepareForCache));
                                }

                                return Sequences.simple(
                                    Iterables.transform(
                                        segmentResults,
                                        toolChest.makeMetricManipulatorFn(
                                            rewrittenQuery,
                                            new MetricManipulationFn() {
                                              @Override
                                              public Object manipulate(
                                                  AggregatorFactory factory, Object object) {
                                                return factory.deserialize(object);
                                              }
                                            })));
                              }
                            }));
              }

              listOfSequences.add(Pair.of(intervals.get(0).getStart(), resultSeqToAdd));
            }
          }
        });
  }