Esempio n. 1
0
  public static LinkedHashSet<String> findJars(LogicalPlan dag, Class<?>[] defaultClasses) {
    List<Class<?>> jarClasses = new ArrayList<Class<?>>();

    for (String className : dag.getClassNames()) {
      try {
        Class<?> clazz = Thread.currentThread().getContextClassLoader().loadClass(className);
        jarClasses.add(clazz);
      } catch (ClassNotFoundException e) {
        throw new IllegalArgumentException("Failed to load class " + className, e);
      }
    }

    for (Class<?> clazz : Lists.newArrayList(jarClasses)) {
      // process class and super classes (super does not require deploy annotation)
      for (Class<?> c = clazz; c != Object.class && c != null; c = c.getSuperclass()) {
        // LOG.debug("checking " + c);
        jarClasses.add(c);
        jarClasses.addAll(Arrays.asList(c.getInterfaces()));
      }
    }

    jarClasses.addAll(Arrays.asList(defaultClasses));

    if (dag.isDebug()) {
      LOG.debug("Deploy dependencies: {}", jarClasses);
    }

    LinkedHashSet<String> localJarFiles = new LinkedHashSet<String>(); // avoid duplicates
    HashMap<String, String> sourceToJar = new HashMap<String, String>();

    for (Class<?> jarClass : jarClasses) {
      if (jarClass.getProtectionDomain().getCodeSource() == null) {
        // system class
        continue;
      }
      String sourceLocation =
          jarClass.getProtectionDomain().getCodeSource().getLocation().toString();
      String jar = sourceToJar.get(sourceLocation);
      if (jar == null) {
        // don't create jar file from folders multiple times
        jar = JarFinder.getJar(jarClass);
        sourceToJar.put(sourceLocation, jar);
        LOG.debug("added sourceLocation {} as {}", sourceLocation, jar);
      }
      if (jar == null) {
        throw new AssertionError("Cannot resolve jar file for " + jarClass);
      }
      localJarFiles.add(jar);
    }

    String libJarsPath = dag.getValue(LogicalPlan.LIBRARY_JARS);
    if (!StringUtils.isEmpty(libJarsPath)) {
      String[] libJars = StringUtils.splitByWholeSeparator(libJarsPath, LIB_JARS_SEP);
      localJarFiles.addAll(Arrays.asList(libJars));
    }

    LOG.info("Local jar file dependencies: " + localJarFiles);

    return localJarFiles;
  }
  public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
  /** Remove the temporary block file (if any) */
  public synchronized void unfinalizeBlock(Block b) throws IOException {
    // remove the block from in-memory data structure
    ActiveFile activefile = ongoingCreates.remove(b);
    if (activefile == null) {
      return;
    }
    volumeMap.remove(b);

    // delete the on-disk temp file
    if (delBlockFromDisk(activefile.file, getMetaFile(activefile.file, b), b)) {
      DataNode.LOG.warn("Block " + b + " unfinalized and removed. ");
    }
  }
 /** Turn the block identifier into a filename. */
 public synchronized File getFile(Block b) {
   DatanodeBlockInfo info = volumeMap.get(b);
   if (info != null) {
     return info.getFile();
   }
   return null;
 }
  /**
   * Make a copy of the block if this block is linked to an existing snapshot. This ensures that
   * modifying this block does not modify data in any existing snapshots.
   *
   * @param block Block
   * @param numLinks Detach if the number of links exceed this value
   * @throws IOException
   * @return - true if the specified block was detached
   */
  public boolean detachBlock(Block block, int numLinks) throws IOException {
    DatanodeBlockInfo info = null;

    synchronized (this) {
      info = volumeMap.get(block);
    }
    return info.detachBlock(block, numLinks);
  }
 synchronized File createTmpFile(FSVolume vol, Block blk) throws IOException {
   if (vol == null) {
     vol = volumeMap.get(blk).getVolume();
     if (vol == null) {
       throw new IOException("Could not find volume for block " + blk);
     }
   }
   return vol.createTmpFile(blk);
 }
  /** Complete the block write! */
  public synchronized void finalizeBlock(Block b) throws IOException {
    ActiveFile activeFile = ongoingCreates.get(b);
    if (activeFile == null) {
      throw new IOException("Block " + b + " is already finalized.");
    }
    File f = activeFile.file;
    if (f == null || !f.exists()) {
      throw new IOException("No temporary file " + f + " for block " + b);
    }
    FSVolume v = volumeMap.get(b).getVolume();
    if (v == null) {
      throw new IOException("No volume for temporary file " + f + " for block " + b);
    }

    File dest = null;
    dest = v.addBlock(b, f);
    volumeMap.put(b, new DatanodeBlockInfo(v, dest));
    ongoingCreates.remove(b);
  }
    public int compare(Object path1, Object path2) {
      long date1 = hmTimestamps.get((Path) path1).longValue();
      long date2 = hmTimestamps.get((Path) path2).longValue();

      if (date1 > date2) {
        return 1;
      } else if (date1 < date2) {
        return -1;
      } else {
        return 0;
      }
    }
 /** {@inheritDoc} */
 public void validateBlockMetadata(Block b) throws IOException {
   DatanodeBlockInfo info = volumeMap.get(b);
   if (info == null) {
     throw new IOException("Block " + b + " does not exist in volumeMap.");
   }
   FSVolume v = info.getVolume();
   File tmp = v.getTmpFile(b);
   File f = getFile(b);
   if (f == null) {
     f = tmp;
   }
   if (f == null) {
     throw new IOException("Block " + b + " does not exist on disk.");
   }
   if (!f.exists()) {
     throw new IOException("Block " + b + " block file " + f + " does not exist on disk.");
   }
   if (b.getNumBytes() != f.length()) {
     throw new IOException(
         "Block "
             + b
             + " length is "
             + b.getNumBytes()
             + " does not match block file length "
             + f.length());
   }
   File meta = getMetaFile(f, b);
   if (meta == null) {
     throw new IOException("Block " + b + " metafile does not exist.");
   }
   if (!meta.exists()) {
     throw new IOException("Block " + b + " metafile " + meta + " does not exist on disk.");
   }
   if (meta.length() == 0) {
     throw new IOException("Block " + b + " metafile " + meta + " is empty.");
   }
   long stamp = parseGenerationStamp(f, meta);
   if (stamp != b.getGenerationStamp()) {
     throw new IOException(
         "Block "
             + b
             + " genstamp is "
             + b.getGenerationStamp()
             + " does not match meta file stamp "
             + stamp);
   }
 }
    void getVolumeMap(HashMap<Block, DatanodeBlockInfo> volumeMap, FSVolume volume) {
      if (children != null) {
        for (int i = 0; i < children.length; i++) {
          children[i].getVolumeMap(volumeMap, volume);
        }
      }

      File blockFiles[] = dir.listFiles();
      for (int i = 0; i < blockFiles.length; i++) {
        if (Block.isBlockFilename(blockFiles[i])) {
          long genStamp = getGenerationStampFromFile(blockFiles, blockFiles[i]);
          volumeMap.put(
              new Block(blockFiles[i], blockFiles[i].length(), genStamp),
              new DatanodeBlockInfo(volume, blockFiles[i]));
        }
      }
    }
  /** Returns handles to the block file and its metadata file */
  public synchronized BlockInputStreams getTmpInputStreams(Block b, long blkOffset, long ckoff)
      throws IOException {

    DatanodeBlockInfo info = volumeMap.get(b);
    if (info == null) {
      throw new IOException("Block " + b + " does not exist in volumeMap.");
    }
    FSVolume v = info.getVolume();
    File blockFile = v.getTmpFile(b);
    RandomAccessFile blockInFile = new RandomAccessFile(blockFile, "r");
    if (blkOffset > 0) {
      blockInFile.seek(blkOffset);
    }
    File metaFile = getMetaFile(blockFile, b);
    RandomAccessFile metaInFile = new RandomAccessFile(metaFile, "r");
    if (ckoff > 0) {
      metaInFile.seek(ckoff);
    }
    return new BlockInputStreams(
        new FileInputStream(blockInFile.getFD()), new FileInputStream(metaInFile.getFD()));
  }
 /** Sets the offset in the block to which the the next write will write data to. */
 public void setChannelPosition(Block b, BlockWriteStreams streams, long dataOffset, long ckOffset)
     throws IOException {
   long size = 0;
   synchronized (this) {
     FSVolume vol = volumeMap.get(b).getVolume();
     size = vol.getTmpFile(b).length();
   }
   if (size < dataOffset) {
     String msg =
         "Trying to change block file offset of block "
             + b
             + " to "
             + dataOffset
             + " but actual size of file is "
             + size;
     throw new IOException(msg);
   }
   FileOutputStream file = (FileOutputStream) streams.dataOut;
   file.getChannel().position(dataOffset);
   file = (FileOutputStream) streams.checksumOut;
   file.getChannel().position(ckOffset);
 }
  /**
   * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all
   * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block,
   * and then reopen the file.
   */
  public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException {
    //
    // Make sure the block isn't a valid one - we're still creating it!
    //
    if (isValidBlock(b)) {
      if (!isRecovery) {
        throw new BlockAlreadyExistsException(
            "Block " + b + " is valid, and cannot be written to.");
      }
      // If the block was successfully finalized because all packets
      // were successfully processed at the Datanode but the ack for
      // some of the packets were not received by the client. The client
      // re-opens the connection and retries sending those packets.
      // The other reason is that an "append" is occurring to this block.
      detachBlock(b, 1);
    }
    long blockSize = b.getNumBytes();

    //
    // Serialize access to /tmp, and check if file already there.
    //
    File f = null;
    List<Thread> threads = null;
    synchronized (this) {
      //
      // Is it already in the create process?
      //
      ActiveFile activeFile = ongoingCreates.get(b);
      if (activeFile != null) {
        f = activeFile.file;
        threads = activeFile.threads;

        if (!isRecovery) {
          throw new BlockAlreadyExistsException(
              "Block "
                  + b
                  + " has already been started (though not completed), and thus cannot be created.");
        } else {
          for (Thread thread : threads) {
            thread.interrupt();
          }
        }
        ongoingCreates.remove(b);
      }
      FSVolume v = null;
      if (!isRecovery) {
        v = volumes.getNextVolume(blockSize);
        // create temporary file to hold block in the designated volume
        f = createTmpFile(v, b);
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else if (f != null) {
        DataNode.LOG.info("Reopen already-open Block for append " + b);
        // create or reuse temporary file to hold block in the
        // designated volume
        v = volumeMap.get(b).getVolume();
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else {
        // reopening block for appending to it.
        DataNode.LOG.info("Reopen Block for append " + b);
        v = volumeMap.get(b).getVolume();
        f = createTmpFile(v, b);
        File blkfile = getBlockFile(b);
        File oldmeta = getMetaFile(b);
        File newmeta = getMetaFile(f, b);

        // rename meta file to tmp directory
        DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta);
        if (!oldmeta.renameTo(newmeta)) {
          throw new IOException(
              "Block "
                  + b
                  + " reopen failed. "
                  + " Unable to move meta file  "
                  + oldmeta
                  + " to tmp dir "
                  + newmeta);
        }

        // rename block file to tmp directory
        DataNode.LOG.debug("Renaming " + blkfile + " to " + f);
        if (!blkfile.renameTo(f)) {
          if (!f.delete()) {
            throw new IOException(
                "Block " + b + " reopen failed. " + " Unable to remove file " + f);
          }
          if (!blkfile.renameTo(f)) {
            throw new IOException(
                "Block "
                    + b
                    + " reopen failed. "
                    + " Unable to move block file "
                    + blkfile
                    + " to tmp dir "
                    + f);
          }
        }
        volumeMap.put(b, new DatanodeBlockInfo(v));
      }
      if (f == null) {
        DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file.");
        throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file.");
      }
      ongoingCreates.put(b, new ActiveFile(f, threads));
    }

    try {
      if (threads != null) {
        for (Thread thread : threads) {
          thread.join();
        }
      }
    } catch (InterruptedException e) {
      throw new IOException("Recovery waiting for thread interrupted.");
    }

    //
    // Finally, allow a writer to the block file
    // REMIND - mjc - make this a filter stream that enforces a max
    // block size, so clients can't go crazy
    //
    File metafile = getMetaFile(f, b);
    DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length());
    DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length());
    return createBlockWriteStreams(f, metafile);
  }
  /**
   * Try to update an old block to a new block. If there are ongoing create threads running for the
   * old block, the threads will be returned without updating the block.
   *
   * @return ongoing create threads if there is any. Otherwise, return null.
   */
  private synchronized List<Thread> tryUpdateBlock(Block oldblock, Block newblock)
      throws IOException {
    // check ongoing create threads
    final ActiveFile activefile = ongoingCreates.get(oldblock);
    if (activefile != null && !activefile.threads.isEmpty()) {
      // remove dead threads
      for (Iterator<Thread> i = activefile.threads.iterator(); i.hasNext(); ) {
        final Thread t = i.next();
        if (!t.isAlive()) {
          i.remove();
        }
      }

      // return living threads
      if (!activefile.threads.isEmpty()) {
        return new ArrayList<Thread>(activefile.threads);
      }
    }

    // No ongoing create threads is alive. Update block.
    File blockFile = findBlockFile(oldblock.getBlockId());
    if (blockFile == null) {
      throw new IOException("Block " + oldblock + " does not exist.");
    }

    File oldMetaFile = findMetaFile(blockFile);
    long oldgs = parseGenerationStamp(blockFile, oldMetaFile);

    // rename meta file to a tmp file
    File tmpMetaFile =
        new File(
            oldMetaFile.getParent(),
            oldMetaFile.getName() + "_tmp" + newblock.getGenerationStamp());
    if (!oldMetaFile.renameTo(tmpMetaFile)) {
      throw new IOException("Cannot rename block meta file to " + tmpMetaFile);
    }

    // update generation stamp
    if (oldgs > newblock.getGenerationStamp()) {
      throw new IOException(
          "Cannot update block (id="
              + newblock.getBlockId()
              + ") generation stamp from "
              + oldgs
              + " to "
              + newblock.getGenerationStamp());
    }

    // update length
    if (newblock.getNumBytes() > oldblock.getNumBytes()) {
      throw new IOException(
          "Cannot update block file (="
              + blockFile
              + ") length from "
              + oldblock.getNumBytes()
              + " to "
              + newblock.getNumBytes());
    }
    if (newblock.getNumBytes() < oldblock.getNumBytes()) {
      truncateBlock(blockFile, tmpMetaFile, oldblock.getNumBytes(), newblock.getNumBytes());
    }

    // rename the tmp file to the new meta file (with new generation stamp)
    File newMetaFile = getMetaFile(blockFile, newblock);
    if (!tmpMetaFile.renameTo(newMetaFile)) {
      throw new IOException("Cannot rename tmp meta file to " + newMetaFile);
    }

    updateBlockMap(ongoingCreates, oldblock, newblock);
    updateBlockMap(volumeMap, oldblock, newblock);

    // paranoia! verify that the contents of the stored block
    // matches the block file on disk.
    validateBlockMetadata(newblock);
    return null;
  }
 /**
  * We're informed that a block is no longer valid. We could lazily garbage-collect the block, but
  * why bother? just get rid of it.
  */
 public void invalidate(Block invalidBlks[]) throws IOException {
   boolean error = false;
   for (int i = 0; i < invalidBlks.length; i++) {
     File f = null;
     FSVolume v;
     synchronized (this) {
       f = getFile(invalidBlks[i]);
       DatanodeBlockInfo dinfo = volumeMap.get(invalidBlks[i]);
       if (dinfo == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". BlockInfo not found in volumeMap.");
         error = true;
         continue;
       }
       v = dinfo.getVolume();
       if (f == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". Block not found in blockMap."
                 + ((v == null) ? " " : " Block found in volumeMap."));
         error = true;
         continue;
       }
       if (v == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". No volume for this block."
                 + " Block found in blockMap. "
                 + f
                 + ".");
         error = true;
         continue;
       }
       File parent = f.getParentFile();
       if (parent == null) {
         DataNode.LOG.warn(
             "Unexpected error trying to delete block "
                 + invalidBlks[i]
                 + ". Parent not found for file "
                 + f
                 + ".");
         error = true;
         continue;
       }
       v.clearPath(parent);
       volumeMap.remove(invalidBlks[i]);
     }
     File metaFile = getMetaFile(f, invalidBlks[i]);
     long blockSize = f.length() + metaFile.length();
     if (!f.delete() || (!metaFile.delete() && metaFile.exists())) {
       DataNode.LOG.warn(
           "Unexpected error trying to delete block " + invalidBlks[i] + " at file " + f);
       error = true;
       continue;
     }
     v.decDfsUsed(blockSize);
     DataNode.LOG.info("Deleting block " + invalidBlks[i] + " file " + f);
     if (f.exists()) {
       //
       // This is a temporary check especially for hadoop-1220.
       // This will go away in the future.
       //
       DataNode.LOG.info("File " + f + " was deleted but still exists!");
     }
   }
   if (error) {
     throw new IOException("Error in deleting blocks.");
   }
 }
  /**
   * Method to go though the HDFS filesystem in a DFS to find all files
   *
   * <p>fs:FileSystem object from HDFS minDate: Oldest date for files to be backed up maxDate:Newest
   * date for files to be backed up p:Path in HDFS to look for files pathList:Will be filled with
   * all files in p hmTimestamps: hashmap of timestamps for later sorting
   */
  public void checkDir(
      FileSystem fs,
      long minDate,
      long maxDate,
      Path p,
      ArrayList<Path> pathList,
      HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
      String sPath = p.toUri().getPath();

      // If this is a directory
      if (fs.getFileStatus(p).isDir()) {
        // ignore certain directories
        if ("dfstmp".equals(p.getName())
            || "tmp".equals(p.getName())
            || "jobtracker".equals(p.getName())
            || sPath.startsWith("/mapred")
            || "ops".equals(p.getName())
            || p.getName().startsWith("_distcp_logs")) {
          return;
        }

        // dump the mkdir and chmod commands for this
        // directory -- skip root directory only
        {
          FileStatus stat = fs.getFileStatus(p);

          if (!sPath.equals("/")) {
            m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
          }

          m_wrChmods.println(
              "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

          Short sh = new Short(stat.getPermission().toShort());
          m_wrChmods.println(
              "hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
        }

        fStat = fs.listStatus(p);

        // Do a recursive call to all elements
        for (int i = 0; i < fStat.length; i++) {
          checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
        }
      } else {
        // If not a directory then we've found a file

        // ignore crc files
        if (p.getName().endsWith(".crc")) {
          return;
        }

        // ignore other files
        if (sPath.startsWith("/user/oozie/etl/workflows/")) {
          return;
        }

        // try to get the table name from the path. There are
        // various types of tables, from those replicated from
        // another database to regular hive tables to
        // partitioned hive tables.  We use table names to
        // both exclude some from the backup, and for the rest
        // to dump out the schema and partition name.
        if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
          m_nIgnoredTables++;

          if (m_nIgnoredTables < 5) {
            System.out.println("Skipping ignore-table file: " + sPath);
          } else if (m_nIgnoredTables == 5) {
            System.out.println("(...not showing other skipped tables...)");
          }
          return;
        }

        FileStatus stat = fs.getFileStatus(p);

        tmpDate = stat.getModificationTime() / 1000;

        // store the chmods/chowns for all files
        m_wrChmods.println(
            "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

        m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

        // check dates.  is it too young?
        if (tmpDate < minDate) {
          return;
        }

        // is the file too recent?
        if (tmpDate > maxDate) {
          // System.out.println("file too recent: " + sPath);
          return;
        }

        // file timestamp is ok
        pathList.add(p);

        hmTimestamps.put(p, new Long(tmpDate));

        // store info about total bytes neeed to backup
        m_nTotalBytes += fs.getContentSummary(p).getLength();
      }
    } catch (IOException e) {
      System.err.println("ERROR: could not open " + p + ": " + e);

      // System.exit(1) ;
    }
  }