@Test public void writeToString() { ValidTxnList txns = new ValidCompactorTxnList(new long[] {9, 7, 10}, 9, 37); Assert.assertEquals("37:9:7:9:10", txns.writeToString()); txns = new ValidCompactorTxnList(); Assert.assertEquals(Long.toString(Long.MAX_VALUE) + ":-1:", txns.writeToString()); txns = new ValidCompactorTxnList(new long[0], -1, 23); Assert.assertEquals("23:-1:", txns.writeToString()); }
@Override public boolean next(RecordIdentifier recordIdentifier, OrcStruct prev) throws IOException { boolean keysSame = true; while (keysSame && primary != null) { // The primary's nextRecord is the next value to return OrcStruct current = primary.nextRecord; recordIdentifier.set(primary.key); // Advance the primary reader to the next record primary.next(extraValue); // Save the current record as the new extraValue for next time so that // we minimize allocations extraValue = current; // now that the primary reader has advanced, we need to see if we // continue to read it or move to the secondary. if (primary.nextRecord == null || primary.key.compareTo(secondaryKey) > 0) { // if the primary isn't done, push it back into the readers if (primary.nextRecord != null) { readers.put(primary.key, primary); } // update primary and secondaryKey Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry != null) { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } } else { primary = null; } } // if this transaction isn't ok, skip over it if (!validTxnList.isTxnValid(((ReaderKey) recordIdentifier).getCurrentTransactionId())) { continue; } // if we are collapsing, figure out if this is a new row if (collapse) { keysSame = prevKey.compareRow(recordIdentifier) == 0; if (!keysSame) { prevKey.set(recordIdentifier); } } else { keysSame = false; } // set the output record by fiddling with the pointers so that we can // avoid a copy. prev.linkFields(current); } return !keysSame; }
// todo: this doesn;t check if compaction is already running (even though Initiator does but we // don't go through Initiator for user initiated compactions) @Override public void run() { do { boolean launchedJob = false; // Make sure nothing escapes this run method and kills the metastore at large, // so wrap it in a big catch Throwable statement. try { final CompactionInfo ci = txnHandler.findNextToCompact(name); if (ci == null && !stop.get()) { try { Thread.sleep(SLEEP_TIME); continue; } catch (InterruptedException e) { LOG.warn("Worker thread sleep interrupted " + e.getMessage()); continue; } } // Find the table we will be working with. Table t1 = null; try { t1 = resolveTable(ci); if (t1 == null) { LOG.info( "Unable to find table " + ci.getFullTableName() + ", assuming it was dropped and moving on."); txnHandler.markCleaned(ci); continue; } } catch (MetaException e) { txnHandler.markCleaned(ci); continue; } // This chicanery is to get around the fact that the table needs to be final in order to // go into the doAs below. final Table t = t1; // Find the partition we will be working with, if there is one. Partition p = null; try { p = resolvePartition(ci); if (p == null && ci.partName != null) { LOG.info( "Unable to find partition " + ci.getFullPartitionName() + ", assuming it was dropped and moving on."); txnHandler.markCleaned(ci); continue; } } catch (Exception e) { txnHandler.markCleaned(ci); continue; } // Find the appropriate storage descriptor final StorageDescriptor sd = resolveStorageDescriptor(t, p); // Check that the table or partition isn't sorted, as we don't yet support that. if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) { LOG.error("Attempt to compact sorted table, which is not yet supported!"); txnHandler.markCleaned(ci); continue; } final boolean isMajor = ci.isMajorCompaction(); final ValidTxnList txns = CompactionTxnHandler.createValidCompactTxnList(txnHandler.getOpenTxnsInfo()); LOG.debug("ValidCompactTxnList: " + txns.writeToString()); txnHandler.setCompactionHighestTxnId(ci, txns.getHighWatermark()); final StringBuilder jobName = new StringBuilder(name); jobName.append("-compactor-"); jobName.append(ci.getFullPartitionName()); // Determine who to run as String runAs; if (ci.runAs == null) { runAs = findUserToRunAs(sd.getLocation(), t); txnHandler.setRunAs(ci.id, runAs); } else { runAs = ci.runAs; } LOG.info("Starting " + ci.type.toString() + " compaction for " + ci.getFullPartitionName()); final StatsUpdater su = StatsUpdater.init( ci, txnHandler.findColumnsWithStats(ci), conf, runJobAsSelf(runAs) ? runAs : t.getOwner()); final CompactorMR mr = new CompactorMR(); launchedJob = true; try { if (runJobAsSelf(runAs)) { mr.run(conf, jobName.toString(), t, sd, txns, ci, su); } else { UserGroupInformation ugi = UserGroupInformation.createProxyUser( t.getOwner(), UserGroupInformation.getLoginUser()); ugi.doAs( new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { mr.run(conf, jobName.toString(), t, sd, txns, ci, su); return null; } }); } txnHandler.markCompacted(ci); } catch (Exception e) { LOG.error( "Caught exception while trying to compact " + ci + ". Marking clean to avoid repeated failures, " + StringUtils.stringifyException(e)); txnHandler.markFailed(ci); } } catch (Throwable t) { LOG.error( "Caught an exception in the main loop of compactor worker " + name + ", " + StringUtils.stringifyException(t)); } // If we didn't try to launch a job it either means there was no work to do or we got // here as the result of a communication failure with the DB. Either way we want to wait // a bit before we restart the loop. if (!launchedJob && !stop.get()) { try { Thread.sleep(SLEEP_TIME); } catch (InterruptedException e) { } } } while (!stop.get()); }
/** * Run a compactor job. * * @param conf Hive configuration file * @param jobName name to run this job with * @param t metastore table * @param sd metastore storage descriptor * @param txns list of valid transactions * @param isMajor is this a major compaction? * @throws java.io.IOException if the job fails */ void run( HiveConf conf, String jobName, Table t, StorageDescriptor sd, ValidTxnList txns, boolean isMajor, Worker.StatsUpdater su) throws IOException { JobConf job = new JobConf(conf); job.setJobName(jobName); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setJarByClass(CompactorMR.class); LOG.debug("User jar set to " + job.getJar()); job.setMapperClass(CompactorMap.class); job.setNumReduceTasks(0); job.setInputFormat(CompactorInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setOutputCommitter(CompactorOutputCommitter.class); String queueName = conf.getVar(HiveConf.ConfVars.COMPACTOR_JOB_QUEUE); if (queueName != null && queueName.length() > 0) { job.setQueueName(queueName); } job.set(FINAL_LOCATION, sd.getLocation()); job.set(TMP_LOCATION, sd.getLocation() + "/" + TMPDIR + "_" + UUID.randomUUID().toString()); job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat()); job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat()); job.setBoolean(IS_MAJOR, isMajor); job.setBoolean(IS_COMPRESSED, sd.isCompressed()); job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString()); job.setInt(NUM_BUCKETS, sd.getNumBuckets()); job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString()); setColumnTypes(job, sd.getCols()); // Figure out and encode what files we need to read. We do this here (rather than in // getSplits below) because as part of this we discover our minimum and maximum transactions, // and discovering that in getSplits is too late as we then have no way to pass it to our // mapper. AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, false); StringableList dirsToSearch = new StringableList(); Path baseDir = null; if (isMajor) { // There may not be a base dir if the partition was empty before inserts or if this // partition is just now being converted to ACID. baseDir = dir.getBaseDirectory(); if (baseDir == null) { List<HdfsFileStatusWithId> originalFiles = dir.getOriginalFiles(); if (!(originalFiles == null) && !(originalFiles.size() == 0)) { // There are original format files for (HdfsFileStatusWithId stat : originalFiles) { Path path = stat.getFileStatus().getPath(); dirsToSearch.add(path); LOG.debug("Adding original file " + path + " to dirs to search"); } // Set base to the location so that the input format reads the original files. baseDir = new Path(sd.getLocation()); } } else { // add our base to the list of directories to search for files in. LOG.debug("Adding base directory " + baseDir + " to dirs to search"); dirsToSearch.add(baseDir); } } List<AcidUtils.ParsedDelta> parsedDeltas = dir.getCurrentDirectories(); if (parsedDeltas == null || parsedDeltas.size() == 0) { // Seriously, no deltas? Can't compact that. LOG.error("No delta files found to compact in " + sd.getLocation()); return; } StringableList deltaDirs = new StringableList(); long minTxn = Long.MAX_VALUE; long maxTxn = Long.MIN_VALUE; for (AcidUtils.ParsedDelta delta : parsedDeltas) { LOG.debug("Adding delta " + delta.getPath() + " to directories to search"); dirsToSearch.add(delta.getPath()); deltaDirs.add(delta.getPath()); minTxn = Math.min(minTxn, delta.getMinTransaction()); maxTxn = Math.max(maxTxn, delta.getMaxTransaction()); } if (baseDir != null) job.set(BASE_DIR, baseDir.toString()); job.set(DELTA_DIRS, deltaDirs.toString()); job.set(DIRS_TO_SEARCH, dirsToSearch.toString()); job.setLong(MIN_TXN, minTxn); job.setLong(MAX_TXN, maxTxn); LOG.debug("Setting minimum transaction to " + minTxn); LOG.debug("Setting maximume transaction to " + maxTxn); RunningJob rj = JobClient.runJob(job); LOG.info( "Submitted " + (isMajor ? CompactionType.MAJOR : CompactionType.MINOR) + " compaction job '" + jobName + "' with jobID=" + rj.getID() + " to " + job.getQueueName() + " queue. " + "(current delta dirs count=" + dir.getCurrentDirectories().size() + ", obsolete delta dirs count=" + dir.getObsolete()); rj.waitForCompletion(); su.gatherStats(); }
@Test public void exceptionsInMidst() { ValidTxnList txns = new ValidCompactorTxnList(new long[] {8}, 8, 15); ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9); Assert.assertEquals(ValidTxnList.RangeResponse.NONE, rsp); }
@Test public void maxTxnLowNoExceptions() { ValidTxnList txns = new ValidCompactorTxnList(new long[0], -1, 15); ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9); Assert.assertEquals(ValidTxnList.RangeResponse.ALL, rsp); }
@Test public void minTxnHighNoExceptions() { ValidTxnList txns = new ValidCompactorTxnList(new long[0], -1, 5); ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9); Assert.assertEquals(ValidTxnList.RangeResponse.NONE, rsp); }
@Test public void maxTxnLow() { ValidTxnList txns = new ValidCompactorTxnList(new long[] {13, 14}, 13, 15); ValidTxnList.RangeResponse rsp = txns.isTxnRangeValid(7, 9); Assert.assertEquals(ValidTxnList.RangeResponse.ALL, rsp); }