Ejemplo n.º 1
0
  @Override
  protected void runTask() throws Exception {

    progress.setState(State.RUNNING);

    int mergeCount = 0;

    T a = sourceA.hasNext() ? sourceA.read() : null;
    T b = sourceB.hasNext() ? sourceB.read() : null;
    while (a != null && b != null) {
      final int c = comparator.compare(a, b);
      if (c < 0) {
        sink.write(a);
        a = sourceA.hasNext() ? sourceA.read() : null;
      } else if (c > 0) {
        sink.write(b);
        b = sourceB.hasNext() ? sourceB.read() : null;
      } else {
        sink.write(a);
        sink.write(b);
        a = sourceA.hasNext() ? sourceA.read() : null;
        b = sourceB.hasNext() ? sourceB.read() : null;
      }
      ++mergeCount;

      if (mergeCount % 1000000 == 0) {
        progress.setMessage(MessageFormat.format("Merged {0} unique items.", mergeCount));
      }
    }
    while (a != null) {
      sink.write(a);
      a = sourceA.hasNext() ? sourceA.read() : null;
      ++mergeCount;

      if (mergeCount % 1000000 == 0) {
        progress.setMessage(MessageFormat.format("Merged {0} unique items.", mergeCount));
      }
    }
    while (b != null) {
      sink.write(b);
      b = sourceB.hasNext() ? sourceB.read() : null;
      ++mergeCount;

      if (mergeCount % 1000000 == 0) {
        progress.setMessage(MessageFormat.format("Merged {0} unique items.", mergeCount));
      }
    }

    progress.startAdjusting();
    progress.setMessage(MessageFormat.format("Merged {0} unique items.", mergeCount));
    progress.setState(State.COMPLETED);
    progress.endAdjusting();

    if (sink instanceof Flushable) ((Flushable) sink).flush();
  }
Ejemplo n.º 2
0
  @Override
  public void runCommand() throws Exception {
    if (LOG.isInfoEnabled()) LOG.info("Running filtering.");
    if (LOG.isDebugEnabled()) LOG.debug(this);

    if (filterFeatureMinFreq > 0) {
      addFeaturesMinimumFrequency(filterFeatureMinFreq);
    }
    if (filterFeaturePattern != null) {
      addFeaturesPattern(filterFeaturePattern);
    }
    if (filterFeatureWhitelist != null) {
      addFeaturesWhitelist(
          com.google.common.io.Files.readLines(filterFeatureWhitelist, getCharset()));
    }

    if (filterEntryMinFreq > 0) {
      addEntryMinimumFrequency(filterEntryMinFreq);
    }
    if (filterEntryPattern != null) {
      addEntryPattern(filterEntryPattern);
    }
    if (filterEntryWhitelist != null) {
      addEntryWhitelist(com.google.common.io.Files.readLines(filterEntryWhitelist, getCharset()));
    }

    if (filterEventMinFreq > 0) {
      addEventMinimumFrequency(filterEventMinFreq);
    }

    checkState();
    activeEventsFile = inputEventsFile;
    activeEntriesFile = inputEntriesFile;
    activeFeaturesFile = inputFeaturesFile;

    progress.addProgressListener(
        new ProgressListener() {

          @Override
          public void progressChanged(ProgressEvent progressEvent) {
            LOG.info(progressEvent.getSource().getProgressReport());
          }
        });

    progress.setState(State.RUNNING);
    progress.setProgressPercent(0);

    // Run the filters forwards then backwards. Each filtering step may
    // introduce additionaly filters for the other files, so continue
    // looping until there is no work remaining. Depending on filters this
    // very unlikely to take more than 3 passes

    int passCount = 0;
    int opCount = 0;

    while (entryFilterRequired || eventFilterRequired || featureFilterRequired) {

      //            if (entryFilterRequired || eventFilterRequired) {

      progress.setMessage("Running filtering pass (#" + (++passCount) + ").");

      if (entryFilterRequired) {
        filterEntries();
        ++opCount;
        progress.setProgressPercent(
            100
                * opCount
                / (opCount
                    + 3
                    + (entryFilterRequired ? 1 : 0)
                    + (eventFilterRequired ? 1 : 0)
                    + (featureFilterRequired ? 1 : 0)));
      }

      if (eventFilterRequired) {
        filterEvents();
        ++opCount;
        progress.setProgressPercent(
            100
                * opCount
                / (opCount
                    + 3
                    + (entryFilterRequired ? 1 : 0)
                    + (eventFilterRequired ? 1 : 0)
                    + (featureFilterRequired ? 1 : 0)));
      }

      if (featureFilterRequired) {
        filterFeatures();
        ++opCount;
        progress.setProgressPercent(
            100
                * opCount
                / (opCount
                    + 3
                    + (entryFilterRequired ? 1 : 0)
                    + (eventFilterRequired ? 1 : 0)
                    + (featureFilterRequired ? 1 : 0)));
      }
      //            }
      //
      //            if (featureFilterRequired || eventFilterRequired) {
      //
      //                progress.setMessage("Running backwards filtering pass (#" + (++passCount) +
      // ").");
      //
      //                if (featureFilterRequired) {
      //                    filterFeatures();
      //                    ++opCount;
      //                    progress.setProgressPercent(100 * opCount / (opCount
      //                            + (entryFilterRequired ? 1 : 0)
      //                            + (eventFilterRequired ? 1 : 0)
      //                            + (featureFilterRequired ? 1 : 0)));
      //                }

      if (eventFilterRequired) {
        filterEvents();
        ++opCount;
        progress.setProgressPercent(
            100
                * opCount
                / (opCount
                    + 3
                    + (entryFilterRequired ? 1 : 0)
                    + (eventFilterRequired ? 1 : 0)
                    + (featureFilterRequired ? 1 : 0)));
      }

      if (entryFilterRequired) {
        filterEntries();
        ++opCount;
        progress.setProgressPercent(
            100
                * opCount
                / (opCount
                    + 3
                    + (entryFilterRequired ? 1 : 0)
                    + (eventFilterRequired ? 1 : 0)
                    + (featureFilterRequired ? 1 : 0)));
      }
      //            }
    }

    // Finished filtering so copy the results files to the outputs.

    progress.setMessage("Copying final entries file.");

    outputEntriesFile.delete();
    if (!activeEntriesFile.renameTo(outputEntriesFile)) {
      com.google.common.io.Files.copy(activeEntriesFile, outputEntriesFile);
      if (!activeEntriesFile.equals(inputEntriesFile)) activeEntriesFile.delete();
    }
    ++opCount;

    progress.startAdjusting();
    progress.setProgressPercent(
        100
            * opCount
            / (opCount
                + 2
                + (entryFilterRequired ? 1 : 0)
                + (eventFilterRequired ? 1 : 0)
                + (featureFilterRequired ? 1 : 0)));
    progress.setMessage("Copying finaly events file.");
    progress.endAdjusting();

    outputEventsFile.delete();
    if (!activeEventsFile.renameTo(outputEventsFile)) {
      com.google.common.io.Files.copy(activeEventsFile, outputEventsFile);
      if (!activeEventsFile.equals(inputEventsFile)) activeEventsFile.delete();
    }
    ++opCount;

    progress.startAdjusting();
    progress.setProgressPercent(
        100
            * opCount
            / (opCount
                + 1
                + (entryFilterRequired ? 1 : 0)
                + (eventFilterRequired ? 1 : 0)
                + (featureFilterRequired ? 1 : 0)));
    progress.setMessage("Copying final features file.");
    progress.endAdjusting();

    outputFeaturesFile.delete();
    if (!activeFeaturesFile.renameTo(outputFeaturesFile)) {
      com.google.common.io.Files.copy(activeFeaturesFile, outputFeaturesFile);
      if (!activeFeaturesFile.equals(inputFeaturesFile)) activeFeaturesFile.delete();
    }
    ++opCount;
    progress.setProgressPercent(
        100
            * opCount
            / (opCount
                + 0
                + (entryFilterRequired ? 1 : 0)
                + (eventFilterRequired ? 1 : 0)
                + (featureFilterRequired ? 1 : 0)));

    if (indexDeligate.isEnumeratorOpen()) {
      indexDeligate.saveEnumerator();
      indexDeligate.closeEnumerator();
    }

    progress.setState(State.COMPLETED);
  }