private void finalizePartitionsUpTo(String topic, Calendar calendar) throws IOException, ParseException, InterruptedException { NavigableSet<Calendar> partitionDates = getPartitions(topic).headSet(calendar, true).descendingSet(); final String s3Prefix = "s3n://" + mConfig.getS3Bucket() + "/" + mConfig.getS3Path(); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); format.setTimeZone(TimeZone.getTimeZone("UTC")); for (Calendar partition : partitionDates) { String partitionStr = format.format(partition.getTime()); String[] partitions = {"dt=" + partitionStr}; LogFilePath logFilePath = new LogFilePath( s3Prefix, topic, partitions, mConfig.getGeneration(), 0, 0, mFileExtension); String logFileDir = logFilePath.getLogFileDir(); assert FileUtil.exists(logFileDir) : "FileUtil.exists(" + logFileDir + ")"; String successFilePath = logFileDir + "/_SUCCESS"; if (FileUtil.exists(successFilePath)) { return; } try { mQuboleClient.addPartition(mConfig.getHivePrefix() + topic, "dt='" + partitionStr + "'"); } catch (Exception e) { LOG.error("failed to finalize topic " + topic + " partition dt=" + partitionStr, e); continue; } LOG.info("touching file " + successFilePath); FileUtil.touch(successFilePath); } }
public static FileSystem getFileSystem(String path) throws IOException { Configuration conf = new Configuration(); if (mConfig != null) { conf.set("fs.s3n.awsAccessKeyId", mConfig.getAwsAccessKey()); conf.set("fs.s3n.awsSecretAccessKey", mConfig.getAwsSecretKey()); } return FileSystem.get(URI.create(path), conf); }
public PartitionFinalizer(SecorConfig config) throws Exception { mConfig = config; mKafkaClient = new KafkaClient(mConfig); mZookeeperConnector = new ZookeeperConnector(mConfig); mMessageParser = (TimestampedMessageParser) ReflectionUtil.createMessageParser(mConfig.getMessageParserClass(), mConfig); mQuboleClient = new QuboleClient(mConfig); if (mConfig.getCompressionCodec() != null && !mConfig.getCompressionCodec().isEmpty()) { CompressionCodec codec = CompressionUtil.createCompressionCodec(mConfig.getCompressionCodec()); mFileExtension = codec.getDefaultExtension(); } else { mFileExtension = ""; } }
@Override public void setUp() throws Exception { SecorConfig mConfig = Mockito.mock(SecorConfig.class); Mockito.when(mConfig.getMessageTimestampName()).thenReturn("ts"); Mockito.when(mConfig.getTimeZone()).thenReturn(TimeZone.getTimeZone("UTC")); mMessagePackParser = new MessagePackParser(mConfig); mObjectMapper = new ObjectMapper(new MessagePackFactory()); HashMap<String, Object> mapWithSecondTimestamp = new HashMap<String, Object>(); mapWithSecondTimestamp.put("ts", 1405970352); mMessageWithSecondsTimestamp = new Message("test", 0, 0, null, mObjectMapper.writeValueAsBytes(mapWithSecondTimestamp)); HashMap<String, Object> mapWithMillisTimestamp = new HashMap<String, Object>(); mapWithMillisTimestamp.put("ts", 1405970352123l); mapWithMillisTimestamp.put("isActive", true); mapWithMillisTimestamp.put("email", "*****@*****.**"); mapWithMillisTimestamp.put("age", 27); mMessageWithMillisTimestamp = new Message("test", 0, 0, null, mObjectMapper.writeValueAsBytes(mapWithMillisTimestamp)); HashMap<String, Object> mapWithMillisFloatTimestamp = new HashMap<String, Object>(); mapWithMillisFloatTimestamp.put("ts", 1405970352123.0); mapWithMillisFloatTimestamp.put("isActive", false); mapWithMillisFloatTimestamp.put("email", "*****@*****.**"); mapWithMillisFloatTimestamp.put("age", 35); mMessageWithMillisFloatTimestamp = new Message( "test", 0, 0, null, mObjectMapper.writeValueAsBytes(mapWithMillisFloatTimestamp)); HashMap<String, Object> mapWithMillisStringTimestamp = new HashMap<String, Object>(); mapWithMillisStringTimestamp.put("ts", "1405970352123"); mapWithMillisStringTimestamp.put("isActive", null); mapWithMillisStringTimestamp.put("email", "*****@*****.**"); mapWithMillisStringTimestamp.put("age", 67); mMessageWithMillisStringTimestamp = new Message( "test", 0, 0, null, mObjectMapper.writeValueAsBytes(mapWithMillisStringTimestamp)); }
private NavigableSet<Calendar> getPartitions(String topic) throws IOException, ParseException { final String s3Prefix = "s3n://" + mConfig.getS3Bucket() + "/" + mConfig.getS3Path(); String[] partitions = {"dt="}; LogFilePath logFilePath = new LogFilePath(s3Prefix, topic, partitions, mConfig.getGeneration(), 0, 0, mFileExtension); String parentDir = logFilePath.getLogFileParentDir(); String[] partitionDirs = FileUtil.list(parentDir); Pattern pattern = Pattern.compile(".*/dt=(\\d\\d\\d\\d-\\d\\d-\\d\\d)$"); TreeSet<Calendar> result = new TreeSet<Calendar>(); for (String partitionDir : partitionDirs) { Matcher matcher = pattern.matcher(partitionDir); if (matcher.find()) { String date = matcher.group(1); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); format.setTimeZone(TimeZone.getTimeZone("UTC")); Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); calendar.setTime(format.parse(date)); result.add(calendar); } } return result; }
public void finalizePartitions() throws Exception { List<String> topics = mZookeeperConnector.getCommittedOffsetTopics(); for (String topic : topics) { if (!topic.matches(mConfig.getKafkaTopicFilter())) { LOG.info("skipping topic " + topic); } else { LOG.info("finalizing topic " + topic); long finalizedTimestampMillis = getFinalizedTimestampMillis(topic); LOG.info("finalized timestamp for topic " + topic + " is " + finalizedTimestampMillis); if (finalizedTimestampMillis != -1) { Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC")); calendar.setTimeInMillis(finalizedTimestampMillis); // Introduce a lag of one day and one hour. calendar.add(Calendar.HOUR, -1); calendar.add(Calendar.DAY_OF_MONTH, -1); finalizePartitionsUpTo(topic, calendar); } } } }
public QuboleClient(SecorConfig config) { mApiToken = config.getQuboleApiToken(); }
private String getPrefix() { return "s3n://" + mConfig.getS3Bucket() + "/" + mConfig.getS3Path(); }