private static void setRecordCount(State state, Job job) {

    Counters counters = null;
    try {
      counters = job.getCounters();
    } catch (IOException e) {
      LOG.info("Failed to get job counters. Record count will not be set. ", e);
      return;
    }

    Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);

    if (recordCounter != null && recordCounter.getValue() != 0) {
      state.setProp(SlaEventKeys.RECORD_COUNT_KEY, Long.toString(recordCounter.getValue()));
      return;
    }

    recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);

    if (recordCounter != null && recordCounter.getValue() != 0) {
      state.setProp(SlaEventKeys.RECORD_COUNT_KEY, Long.toString(recordCounter.getValue()));
      return;
    }

    LOG.info("Non zero record count not found in both mapper and reducer counters");
  }
  /**
   * Getter for proxiedFs, using the passed parameters to create an instance of a proxiedFs.
   *
   * @param properties
   * @param authType is either TOKEN or KEYTAB.
   * @param authPath is the KEYTAB location if the authType is KEYTAB; otherwise, it is the token
   *     file.
   * @param uri File system URI.
   * @throws IOException
   * @throws InterruptedException
   * @throws URISyntaxException
   * @return proxiedFs
   */
  public FileSystem getProxiedFileSystem(
      State properties, AuthType authType, String authPath, String uri)
      throws IOException, InterruptedException, URISyntaxException {
    Preconditions.checkArgument(
        StringUtils.isNotBlank(properties.getProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME)),
        "State does not contain a proper proxy user name");
    String proxyUserName = properties.getProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME);
    UserGroupInformation proxyUser;
    switch (authType) {
      case KEYTAB: // If the authentication type is KEYTAB, log in a super user first before
                   // creating a proxy user.
        Preconditions.checkArgument(
            StringUtils.isNotBlank(
                properties.getProp(ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS)),
            "State does not contain a proper proxy token file name");
        String superUser = properties.getProp(ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS);
        UserGroupInformation.loginUserFromKeytab(superUser, authPath);
        proxyUser =
            UserGroupInformation.createProxyUser(
                proxyUserName, UserGroupInformation.getLoginUser());
        break;
      case TOKEN: // If the authentication type is TOKEN, create a proxy user and then add the token
                  // to the user.
        proxyUser =
            UserGroupInformation.createProxyUser(
                proxyUserName, UserGroupInformation.getLoginUser());
        Optional<Token> proxyToken = this.getTokenFromSeqFile(authPath, proxyUserName);
        if (proxyToken.isPresent()) {
          proxyUser.addToken(proxyToken.get());
        } else {
          LOG.warn("No delegation token found for the current proxy user.");
        }
        break;
      default:
        LOG.warn(
            "Creating a proxy user without authentication, which could not perform File system operations.");
        proxyUser =
            UserGroupInformation.createProxyUser(
                proxyUserName, UserGroupInformation.getLoginUser());
        break;
    }

    final Configuration conf = new Configuration();
    JobConfigurationUtils.putStateIntoConfiguration(properties, conf);
    final URI fsURI = URI.create(uri);
    proxyUser.doAs(
        new PrivilegedExceptionAction<Void>() {
          @Override
          public Void run() throws IOException {
            LOG.debug(
                "Now performing file system operations as :"
                    + UserGroupInformation.getCurrentUser());
            proxiedFs = FileSystem.get(fsURI, conf);
            return null;
          }
        });
    return this.proxiedFs;
  }
 private static Set<String> getUniquePathsToRegister(Collection<? extends WorkUnitState> states) {
   Set<String> paths = Sets.newHashSet();
   for (State state : states) {
     if (state.contains(ConfigurationKeys.PUBLISHER_DIRS)) {
       paths.addAll(state.getPropAsList(ConfigurationKeys.PUBLISHER_DIRS));
     }
   }
   return paths;
 }
  private static void setOutputDedupeStatus(State state) {
    if (state.getPropAsBoolean(
        MRCompactor.COMPACTION_OUTPUT_DEDUPLICATED,
        MRCompactor.DEFAULT_COMPACTION_OUTPUT_DEDUPLICATED)) {
      state.setProp(SlaEventKeys.DEDUPE_STATUS_KEY, DedupeStatus.DEDUPED);

    } else {
      state.setProp(SlaEventKeys.DEDUPE_STATUS_KEY, DedupeStatus.NOT_DEDUPED);
    }
  }
Example #5
0
  public BaseDataPublisher(State state) throws IOException {
    super(state);
    this.closer = Closer.create();
    Configuration conf = new Configuration();

    // Add all job configuration properties so they are picked up by Hadoop
    for (String key : this.getState().getPropertyNames()) {
      conf.set(key, this.getState().getProp(key));
    }

    this.numBranches = this.getState().getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1);

    this.fileSystemByBranches = Lists.newArrayListWithCapacity(this.numBranches);
    this.publisherFinalDirOwnerGroupsByBranches = Lists.newArrayListWithCapacity(this.numBranches);
    this.permissions = Lists.newArrayListWithCapacity(this.numBranches);

    // Get a FileSystem instance for each branch
    for (int i = 0; i < this.numBranches; i++) {
      URI uri =
          URI.create(
              this.getState()
                  .getProp(
                      ForkOperatorUtils.getPropertyNameForBranch(
                          ConfigurationKeys.WRITER_FILE_SYSTEM_URI, this.numBranches, i),
                      ConfigurationKeys.LOCAL_FS_URI));
      this.fileSystemByBranches.add(FileSystem.get(uri, conf));

      // The group(s) will be applied to the final publisher output directory(ies)
      this.publisherFinalDirOwnerGroupsByBranches.add(
          Optional.fromNullable(
              this.getState()
                  .getProp(
                      ForkOperatorUtils.getPropertyNameForBranch(
                          ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR_GROUP, this.numBranches, i))));

      // The permission(s) will be applied to all directories created by the publisher,
      // which do NOT include directories created by the writer and moved by the publisher.
      // The permissions of those directories are controlled by writer.file.permissions and
      // writer.dir.permissions.
      this.permissions.add(
          new FsPermission(
              state.getPropAsShortWithRadix(
                  ForkOperatorUtils.getPropertyNameForBranch(
                      ConfigurationKeys.DATA_PUBLISHER_PERMISSIONS, numBranches, i),
                  FsPermission.getDefault().toShort(),
                  ConfigurationKeys.PERMISSION_PARSING_RADIX)));
    }

    this.parallelRunnerThreads =
        state.getPropAsInt(
            ParallelRunner.PARALLEL_RUNNER_THREADS_KEY,
            ParallelRunner.DEFAULT_PARALLEL_RUNNER_THREADS);
  }
 public String getDefaultEventBusId() {
   State destinationCfg = getDestination().getProperties();
   String eventBusIdKey =
       ForkOperatorUtils.getPathForBranch(
           destinationCfg, FULL_EVENTBUSID_KEY, getBranches(), getBranch());
   if (destinationCfg.contains(eventBusIdKey)) {
     return destinationCfg.getProp(eventBusIdKey);
   } else {
     return WriterUtils.getWriterOutputDir(destinationCfg, getBranches(), getBranch())
         .toString();
   }
 }
 @Test
 public void testGetBranchName() {
   State state = new State();
   state.setProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + ".0", FORK_BRANCH_NAME_0);
   state.setProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + ".1", FORK_BRANCH_NAME_1);
   Assert.assertEquals(
       ForkOperatorUtils.getBranchName(state, 0, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 0),
       FORK_BRANCH_NAME_0);
   Assert.assertEquals(
       ForkOperatorUtils.getBranchName(state, 1, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 1),
       FORK_BRANCH_NAME_1);
   Assert.assertEquals(
       ForkOperatorUtils.getBranchName(state, 2, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 2),
       ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 2);
 }
 private static void addRuntimeHiveRegistrationProperties(State state) {
   // Use seconds instead of milliseconds to be consistent with other times stored in hive
   state.appendToListProp(
       HiveRegProps.HIVE_TABLE_PARTITION_PROPS,
       String.format(
           "%s:%d",
           DATA_PUBLISH_TIME,
           TimeUnit.SECONDS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS)));
 }
 /**
  * Initialize file system helper at most once for this instance. {@inheritDoc}
  *
  * @see
  *     gobblin.source.extractor.filebased.FileBasedSource#initFileSystemHelper(gobblin.configuration.State)
  */
 @Override
 public synchronized void initFileSystemHelper(State state) throws FileBasedHelperException {
   if (fsHelper == null) {
     Credential credential = GoogleCommon.newSourceCredential(state);
     Drive driveClient =
         new Drive.Builder(credential.getTransport(), GoogleCommon.getJsonFactory(), credential)
             .setApplicationName(
                 Preconditions.checkNotNull(
                     state.getProp(APPLICATION_NAME), "ApplicationName is required"))
             .build();
     this.fsHelper = closer.register(new GoogleDriveFsHelper(state, driveClient));
   }
 }
  /**
   * Creates {@link gobblin.metrics.MetricContext}. Tries to read the name of the parent context
   * from key "metrics.context.name" at state, and tries to get the parent context by name from the
   * {@link gobblin.metrics.MetricContext} registry (the parent context must be registered).
   *
   * <p>Automatically adds two tags to the inner context:
   *
   * <ul>
   *   <li>component: attempts to determine which component type within gobblin-api generated this
   *       instance.
   *   <li>class: the specific class of the object that generated this instance of Instrumented
   * </ul>
   */
  public MetricContext getMetricContext(State state, Class<?> klazz, List<Tag<?>> tags) {
    int randomId = new Random().nextInt(Integer.MAX_VALUE);

    List<Tag<?>> generatedTags = Lists.newArrayList();

    if (!klazz.isAnonymousClass()) {
      generatedTags.add(new Tag<>("class", klazz.getCanonicalName()));
    }

    Optional<GobblinMetrics> gobblinMetrics =
        state.contains(ConfigurationKeys.METRIC_CONTEXT_NAME_KEY)
            ? GobblinMetricsRegistry.getInstance()
                .get(state.getProp(ConfigurationKeys.METRIC_CONTEXT_NAME_KEY))
            : Optional.<GobblinMetrics>absent();

    MetricContext.Builder builder =
        gobblinMetrics.isPresent()
            ? gobblinMetrics
                .get()
                .getMetricContext()
                .childBuilder(klazz.getCanonicalName() + "." + randomId)
            : MetricContext.builder(klazz.getCanonicalName() + "." + randomId);
    return builder.addTags(generatedTags).addTags(tags).build();
  }
  /**
   * Provide list of files snapshot where snap shot is consist of list of file ID with modified
   * time. Folder ID and file ID are all optional where missing folder id represent search from root
   * folder where missing file ID represents all files will be included on current and subfolder.
   *
   * <p>{@inheritDoc}
   *
   * @see
   *     gobblin.source.extractor.filebased.FileBasedSource#getcurrentFsSnapshot(gobblin.configuration.State)
   */
  @Override
  public List<String> getcurrentFsSnapshot(State state) {
    List<String> results = new ArrayList<>();

    String folderId = state.getProp(SOURCE_FILEBASED_DATA_DIRECTORY, "");

    try {
      LOG.info("Running ls with folderId: " + folderId);
      List<String> fileIds = this.fsHelper.ls(folderId);
      for (String fileId : fileIds) {
        results.add(fileId + splitPattern + this.fsHelper.getFileMTime(fileId));
      }
    } catch (FileBasedHelperException e) {
      throw new RuntimeException(
          "Failed to retrieve list of file IDs for folderID: " + folderId, e);
    }
    return results;
  }
  @BeforeClass
  @SuppressWarnings("unchecked")
  public void setUp() throws Exception {
    // Making the staging and/or output dirs if necessary
    File stagingDir = new File(TestConstants.TEST_STAGING_DIR);
    File outputDir = new File(TestConstants.TEST_OUTPUT_DIR);
    if (!stagingDir.exists()) {
      stagingDir.mkdirs();
    }
    if (!outputDir.exists()) {
      outputDir.mkdirs();
    }

    this.schema = new Schema.Parser().parse(TestConstants.AVRO_SCHEMA);

    this.filePath =
        TestConstants.TEST_EXTRACT_NAMESPACE.replaceAll("\\.", "/")
            + "/"
            + TestConstants.TEST_EXTRACT_TABLE
            + "/"
            + TestConstants.TEST_EXTRACT_ID
            + "_"
            + TestConstants.TEST_EXTRACT_PULL_TYPE;

    State properties = new State();
    properties.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE);
    properties.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, TestConstants.TEST_FS_URI);
    properties.setProp(ConfigurationKeys.WRITER_STAGING_DIR, TestConstants.TEST_STAGING_DIR);
    properties.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, TestConstants.TEST_OUTPUT_DIR);
    properties.setProp(ConfigurationKeys.WRITER_FILE_PATH, this.filePath);
    properties.setProp(ConfigurationKeys.WRITER_FILE_NAME, TestConstants.TEST_FILE_NAME);

    // Build a writer to write test records
    this.writer =
        new AvroDataWriterBuilder()
            .writeTo(Destination.of(Destination.DestinationType.HDFS, properties))
            .writeInFormat(WriterOutputFormat.AVRO)
            .withWriterId(TestConstants.TEST_WRITER_ID)
            .withSchema(this.schema)
            .forBranch(-1)
            .build();
  }
 public static void setUpstreamTimeStamp(State state, long time) {
   state.setProp(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, Long.toString(time));
 }