private static void setRecordCount(State state, Job job) { Counters counters = null; try { counters = job.getCounters(); } catch (IOException e) { LOG.info("Failed to get job counters. Record count will not be set. ", e); return; } Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT); if (recordCounter != null && recordCounter.getValue() != 0) { state.setProp(SlaEventKeys.RECORD_COUNT_KEY, Long.toString(recordCounter.getValue())); return; } recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT); if (recordCounter != null && recordCounter.getValue() != 0) { state.setProp(SlaEventKeys.RECORD_COUNT_KEY, Long.toString(recordCounter.getValue())); return; } LOG.info("Non zero record count not found in both mapper and reducer counters"); }
/** * Getter for proxiedFs, using the passed parameters to create an instance of a proxiedFs. * * @param properties * @param authType is either TOKEN or KEYTAB. * @param authPath is the KEYTAB location if the authType is KEYTAB; otherwise, it is the token * file. * @param uri File system URI. * @throws IOException * @throws InterruptedException * @throws URISyntaxException * @return proxiedFs */ public FileSystem getProxiedFileSystem( State properties, AuthType authType, String authPath, String uri) throws IOException, InterruptedException, URISyntaxException { Preconditions.checkArgument( StringUtils.isNotBlank(properties.getProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME)), "State does not contain a proper proxy user name"); String proxyUserName = properties.getProp(ConfigurationKeys.FS_PROXY_AS_USER_NAME); UserGroupInformation proxyUser; switch (authType) { case KEYTAB: // If the authentication type is KEYTAB, log in a super user first before // creating a proxy user. Preconditions.checkArgument( StringUtils.isNotBlank( properties.getProp(ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS)), "State does not contain a proper proxy token file name"); String superUser = properties.getProp(ConfigurationKeys.SUPER_USER_NAME_TO_PROXY_AS_OTHERS); UserGroupInformation.loginUserFromKeytab(superUser, authPath); proxyUser = UserGroupInformation.createProxyUser( proxyUserName, UserGroupInformation.getLoginUser()); break; case TOKEN: // If the authentication type is TOKEN, create a proxy user and then add the token // to the user. proxyUser = UserGroupInformation.createProxyUser( proxyUserName, UserGroupInformation.getLoginUser()); Optional<Token> proxyToken = this.getTokenFromSeqFile(authPath, proxyUserName); if (proxyToken.isPresent()) { proxyUser.addToken(proxyToken.get()); } else { LOG.warn("No delegation token found for the current proxy user."); } break; default: LOG.warn( "Creating a proxy user without authentication, which could not perform File system operations."); proxyUser = UserGroupInformation.createProxyUser( proxyUserName, UserGroupInformation.getLoginUser()); break; } final Configuration conf = new Configuration(); JobConfigurationUtils.putStateIntoConfiguration(properties, conf); final URI fsURI = URI.create(uri); proxyUser.doAs( new PrivilegedExceptionAction<Void>() { @Override public Void run() throws IOException { LOG.debug( "Now performing file system operations as :" + UserGroupInformation.getCurrentUser()); proxiedFs = FileSystem.get(fsURI, conf); return null; } }); return this.proxiedFs; }
private static Set<String> getUniquePathsToRegister(Collection<? extends WorkUnitState> states) { Set<String> paths = Sets.newHashSet(); for (State state : states) { if (state.contains(ConfigurationKeys.PUBLISHER_DIRS)) { paths.addAll(state.getPropAsList(ConfigurationKeys.PUBLISHER_DIRS)); } } return paths; }
private static void setOutputDedupeStatus(State state) { if (state.getPropAsBoolean( MRCompactor.COMPACTION_OUTPUT_DEDUPLICATED, MRCompactor.DEFAULT_COMPACTION_OUTPUT_DEDUPLICATED)) { state.setProp(SlaEventKeys.DEDUPE_STATUS_KEY, DedupeStatus.DEDUPED); } else { state.setProp(SlaEventKeys.DEDUPE_STATUS_KEY, DedupeStatus.NOT_DEDUPED); } }
public BaseDataPublisher(State state) throws IOException { super(state); this.closer = Closer.create(); Configuration conf = new Configuration(); // Add all job configuration properties so they are picked up by Hadoop for (String key : this.getState().getPropertyNames()) { conf.set(key, this.getState().getProp(key)); } this.numBranches = this.getState().getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1); this.fileSystemByBranches = Lists.newArrayListWithCapacity(this.numBranches); this.publisherFinalDirOwnerGroupsByBranches = Lists.newArrayListWithCapacity(this.numBranches); this.permissions = Lists.newArrayListWithCapacity(this.numBranches); // Get a FileSystem instance for each branch for (int i = 0; i < this.numBranches; i++) { URI uri = URI.create( this.getState() .getProp( ForkOperatorUtils.getPropertyNameForBranch( ConfigurationKeys.WRITER_FILE_SYSTEM_URI, this.numBranches, i), ConfigurationKeys.LOCAL_FS_URI)); this.fileSystemByBranches.add(FileSystem.get(uri, conf)); // The group(s) will be applied to the final publisher output directory(ies) this.publisherFinalDirOwnerGroupsByBranches.add( Optional.fromNullable( this.getState() .getProp( ForkOperatorUtils.getPropertyNameForBranch( ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR_GROUP, this.numBranches, i)))); // The permission(s) will be applied to all directories created by the publisher, // which do NOT include directories created by the writer and moved by the publisher. // The permissions of those directories are controlled by writer.file.permissions and // writer.dir.permissions. this.permissions.add( new FsPermission( state.getPropAsShortWithRadix( ForkOperatorUtils.getPropertyNameForBranch( ConfigurationKeys.DATA_PUBLISHER_PERMISSIONS, numBranches, i), FsPermission.getDefault().toShort(), ConfigurationKeys.PERMISSION_PARSING_RADIX))); } this.parallelRunnerThreads = state.getPropAsInt( ParallelRunner.PARALLEL_RUNNER_THREADS_KEY, ParallelRunner.DEFAULT_PARALLEL_RUNNER_THREADS); }
public String getDefaultEventBusId() { State destinationCfg = getDestination().getProperties(); String eventBusIdKey = ForkOperatorUtils.getPathForBranch( destinationCfg, FULL_EVENTBUSID_KEY, getBranches(), getBranch()); if (destinationCfg.contains(eventBusIdKey)) { return destinationCfg.getProp(eventBusIdKey); } else { return WriterUtils.getWriterOutputDir(destinationCfg, getBranches(), getBranch()) .toString(); } }
@Test public void testGetBranchName() { State state = new State(); state.setProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + ".0", FORK_BRANCH_NAME_0); state.setProp(ConfigurationKeys.FORK_BRANCH_NAME_KEY + ".1", FORK_BRANCH_NAME_1); Assert.assertEquals( ForkOperatorUtils.getBranchName(state, 0, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 0), FORK_BRANCH_NAME_0); Assert.assertEquals( ForkOperatorUtils.getBranchName(state, 1, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 1), FORK_BRANCH_NAME_1); Assert.assertEquals( ForkOperatorUtils.getBranchName(state, 2, ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 2), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + 2); }
private static void addRuntimeHiveRegistrationProperties(State state) { // Use seconds instead of milliseconds to be consistent with other times stored in hive state.appendToListProp( HiveRegProps.HIVE_TABLE_PARTITION_PROPS, String.format( "%s:%d", DATA_PUBLISH_TIME, TimeUnit.SECONDS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS))); }
/** * Initialize file system helper at most once for this instance. {@inheritDoc} * * @see * gobblin.source.extractor.filebased.FileBasedSource#initFileSystemHelper(gobblin.configuration.State) */ @Override public synchronized void initFileSystemHelper(State state) throws FileBasedHelperException { if (fsHelper == null) { Credential credential = GoogleCommon.newSourceCredential(state); Drive driveClient = new Drive.Builder(credential.getTransport(), GoogleCommon.getJsonFactory(), credential) .setApplicationName( Preconditions.checkNotNull( state.getProp(APPLICATION_NAME), "ApplicationName is required")) .build(); this.fsHelper = closer.register(new GoogleDriveFsHelper(state, driveClient)); } }
/** * Creates {@link gobblin.metrics.MetricContext}. Tries to read the name of the parent context * from key "metrics.context.name" at state, and tries to get the parent context by name from the * {@link gobblin.metrics.MetricContext} registry (the parent context must be registered). * * <p>Automatically adds two tags to the inner context: * * <ul> * <li>component: attempts to determine which component type within gobblin-api generated this * instance. * <li>class: the specific class of the object that generated this instance of Instrumented * </ul> */ public MetricContext getMetricContext(State state, Class<?> klazz, List<Tag<?>> tags) { int randomId = new Random().nextInt(Integer.MAX_VALUE); List<Tag<?>> generatedTags = Lists.newArrayList(); if (!klazz.isAnonymousClass()) { generatedTags.add(new Tag<>("class", klazz.getCanonicalName())); } Optional<GobblinMetrics> gobblinMetrics = state.contains(ConfigurationKeys.METRIC_CONTEXT_NAME_KEY) ? GobblinMetricsRegistry.getInstance() .get(state.getProp(ConfigurationKeys.METRIC_CONTEXT_NAME_KEY)) : Optional.<GobblinMetrics>absent(); MetricContext.Builder builder = gobblinMetrics.isPresent() ? gobblinMetrics .get() .getMetricContext() .childBuilder(klazz.getCanonicalName() + "." + randomId) : MetricContext.builder(klazz.getCanonicalName() + "." + randomId); return builder.addTags(generatedTags).addTags(tags).build(); }
/** * Provide list of files snapshot where snap shot is consist of list of file ID with modified * time. Folder ID and file ID are all optional where missing folder id represent search from root * folder where missing file ID represents all files will be included on current and subfolder. * * <p>{@inheritDoc} * * @see * gobblin.source.extractor.filebased.FileBasedSource#getcurrentFsSnapshot(gobblin.configuration.State) */ @Override public List<String> getcurrentFsSnapshot(State state) { List<String> results = new ArrayList<>(); String folderId = state.getProp(SOURCE_FILEBASED_DATA_DIRECTORY, ""); try { LOG.info("Running ls with folderId: " + folderId); List<String> fileIds = this.fsHelper.ls(folderId); for (String fileId : fileIds) { results.add(fileId + splitPattern + this.fsHelper.getFileMTime(fileId)); } } catch (FileBasedHelperException e) { throw new RuntimeException( "Failed to retrieve list of file IDs for folderID: " + folderId, e); } return results; }
@BeforeClass @SuppressWarnings("unchecked") public void setUp() throws Exception { // Making the staging and/or output dirs if necessary File stagingDir = new File(TestConstants.TEST_STAGING_DIR); File outputDir = new File(TestConstants.TEST_OUTPUT_DIR); if (!stagingDir.exists()) { stagingDir.mkdirs(); } if (!outputDir.exists()) { outputDir.mkdirs(); } this.schema = new Schema.Parser().parse(TestConstants.AVRO_SCHEMA); this.filePath = TestConstants.TEST_EXTRACT_NAMESPACE.replaceAll("\\.", "/") + "/" + TestConstants.TEST_EXTRACT_TABLE + "/" + TestConstants.TEST_EXTRACT_ID + "_" + TestConstants.TEST_EXTRACT_PULL_TYPE; State properties = new State(); properties.setProp(ConfigurationKeys.WRITER_BUFFER_SIZE, ConfigurationKeys.DEFAULT_BUFFER_SIZE); properties.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, TestConstants.TEST_FS_URI); properties.setProp(ConfigurationKeys.WRITER_STAGING_DIR, TestConstants.TEST_STAGING_DIR); properties.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, TestConstants.TEST_OUTPUT_DIR); properties.setProp(ConfigurationKeys.WRITER_FILE_PATH, this.filePath); properties.setProp(ConfigurationKeys.WRITER_FILE_NAME, TestConstants.TEST_FILE_NAME); // Build a writer to write test records this.writer = new AvroDataWriterBuilder() .writeTo(Destination.of(Destination.DestinationType.HDFS, properties)) .writeInFormat(WriterOutputFormat.AVRO) .withWriterId(TestConstants.TEST_WRITER_ID) .withSchema(this.schema) .forBranch(-1) .build(); }
public static void setUpstreamTimeStamp(State state, long time) { state.setProp(SlaEventKeys.UPSTREAM_TS_IN_MILLI_SECS_KEY, Long.toString(time)); }