Esempio n. 1
0
 private void finalizePartitionsUpTo(String topic, Calendar calendar)
     throws IOException, ParseException, InterruptedException {
   NavigableSet<Calendar> partitionDates =
       getPartitions(topic).headSet(calendar, true).descendingSet();
   final String s3Prefix = "s3n://" + mConfig.getS3Bucket() + "/" + mConfig.getS3Path();
   SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
   format.setTimeZone(TimeZone.getTimeZone("UTC"));
   for (Calendar partition : partitionDates) {
     String partitionStr = format.format(partition.getTime());
     String[] partitions = {"dt=" + partitionStr};
     LogFilePath logFilePath =
         new LogFilePath(
             s3Prefix, topic, partitions, mConfig.getGeneration(), 0, 0, mFileExtension);
     String logFileDir = logFilePath.getLogFileDir();
     assert FileUtil.exists(logFileDir) : "FileUtil.exists(" + logFileDir + ")";
     String successFilePath = logFileDir + "/_SUCCESS";
     if (FileUtil.exists(successFilePath)) {
       return;
     }
     try {
       mQuboleClient.addPartition(mConfig.getHivePrefix() + topic, "dt='" + partitionStr + "'");
     } catch (Exception e) {
       LOG.error("failed to finalize topic " + topic + " partition dt=" + partitionStr, e);
       continue;
     }
     LOG.info("touching file " + successFilePath);
     FileUtil.touch(successFilePath);
   }
 }
Esempio n. 2
0
 private void populateTopicPartitionToOffsetToFiles() throws IOException {
   String prefix = getPrefix();
   String topicPrefix = getTopicPrefix();
   String[] paths = FileUtil.listRecursively(topicPrefix);
   for (String path : paths) {
     if (!path.endsWith("/_SUCCESS")) {
       LogFilePath logFilePath = new LogFilePath(prefix, path);
       TopicPartition topicPartition =
           new TopicPartition(logFilePath.getTopic(), logFilePath.getKafkaPartition());
       SortedMap<Long, HashSet<LogFilePath>> offsetToFiles =
           mTopicPartitionToOffsetToFiles.get(topicPartition);
       if (offsetToFiles == null) {
         offsetToFiles = new TreeMap<Long, HashSet<LogFilePath>>();
         mTopicPartitionToOffsetToFiles.put(topicPartition, offsetToFiles);
       }
       long offset = logFilePath.getOffset();
       HashSet<LogFilePath> logFilePaths = offsetToFiles.get(offset);
       if (logFilePaths == null) {
         logFilePaths = new HashSet<LogFilePath>();
         offsetToFiles.put(offset, logFilePaths);
       }
       logFilePaths.add(logFilePath);
     }
   }
 }
Esempio n. 3
0
 private int getMessageCount(LogFilePath logFilePath) throws Exception {
   String path = logFilePath.getLogFilePath();
   Path fsPath = new Path(path);
   FileSystem fileSystem = FileUtil.getFileSystem(path);
   SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration());
   LongWritable key = (LongWritable) reader.getKeyClass().newInstance();
   BytesWritable value = (BytesWritable) reader.getValueClass().newInstance();
   int result = 0;
   while (reader.next(key, value)) {
     result++;
   }
   reader.close();
   return result;
 }
Esempio n. 4
0
 private void getOffsets(LogFilePath logFilePath, Set<Long> offsets) throws Exception {
   String path = logFilePath.getLogFilePath();
   Path fsPath = new Path(path);
   FileSystem fileSystem = FileUtil.getFileSystem(path);
   SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration());
   LongWritable key = (LongWritable) reader.getKeyClass().newInstance();
   BytesWritable value = (BytesWritable) reader.getValueClass().newInstance();
   while (reader.next(key, value)) {
     if (!offsets.add(key.get())) {
       throw new RuntimeException(
           "duplicate key " + key.get() + " found in file " + logFilePath.getLogFilePath());
     }
   }
   reader.close();
 }
Esempio n. 5
0
 private NavigableSet<Calendar> getPartitions(String topic) throws IOException, ParseException {
   final String s3Prefix = "s3n://" + mConfig.getS3Bucket() + "/" + mConfig.getS3Path();
   String[] partitions = {"dt="};
   LogFilePath logFilePath =
       new LogFilePath(s3Prefix, topic, partitions, mConfig.getGeneration(), 0, 0, mFileExtension);
   String parentDir = logFilePath.getLogFileParentDir();
   String[] partitionDirs = FileUtil.list(parentDir);
   Pattern pattern = Pattern.compile(".*/dt=(\\d\\d\\d\\d-\\d\\d-\\d\\d)$");
   TreeSet<Calendar> result = new TreeSet<Calendar>();
   for (String partitionDir : partitionDirs) {
     Matcher matcher = pattern.matcher(partitionDir);
     if (matcher.find()) {
       String date = matcher.group(1);
       SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
       format.setTimeZone(TimeZone.getTimeZone("UTC"));
       Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
       calendar.setTime(format.parse(date));
       result.add(calendar);
     }
   }
   return result;
 }