Example #1
0
  public static LinkedHashSet<String> findJars(LogicalPlan dag, Class<?>[] defaultClasses) {
    List<Class<?>> jarClasses = new ArrayList<Class<?>>();

    for (String className : dag.getClassNames()) {
      try {
        Class<?> clazz = Thread.currentThread().getContextClassLoader().loadClass(className);
        jarClasses.add(clazz);
      } catch (ClassNotFoundException e) {
        throw new IllegalArgumentException("Failed to load class " + className, e);
      }
    }

    for (Class<?> clazz : Lists.newArrayList(jarClasses)) {
      // process class and super classes (super does not require deploy annotation)
      for (Class<?> c = clazz; c != Object.class && c != null; c = c.getSuperclass()) {
        // LOG.debug("checking " + c);
        jarClasses.add(c);
        jarClasses.addAll(Arrays.asList(c.getInterfaces()));
      }
    }

    jarClasses.addAll(Arrays.asList(defaultClasses));

    if (dag.isDebug()) {
      LOG.debug("Deploy dependencies: {}", jarClasses);
    }

    LinkedHashSet<String> localJarFiles = new LinkedHashSet<String>(); // avoid duplicates
    HashMap<String, String> sourceToJar = new HashMap<String, String>();

    for (Class<?> jarClass : jarClasses) {
      if (jarClass.getProtectionDomain().getCodeSource() == null) {
        // system class
        continue;
      }
      String sourceLocation =
          jarClass.getProtectionDomain().getCodeSource().getLocation().toString();
      String jar = sourceToJar.get(sourceLocation);
      if (jar == null) {
        // don't create jar file from folders multiple times
        jar = JarFinder.getJar(jarClass);
        sourceToJar.put(sourceLocation, jar);
        LOG.debug("added sourceLocation {} as {}", sourceLocation, jar);
      }
      if (jar == null) {
        throw new AssertionError("Cannot resolve jar file for " + jarClass);
      }
      localJarFiles.add(jar);
    }

    String libJarsPath = dag.getValue(LogicalPlan.LIBRARY_JARS);
    if (!StringUtils.isEmpty(libJarsPath)) {
      String[] libJars = StringUtils.splitByWholeSeparator(libJarsPath, LIB_JARS_SEP);
      localJarFiles.addAll(Arrays.asList(libJars));
    }

    LOG.info("Local jar file dependencies: " + localJarFiles);

    return localJarFiles;
  }
  public int run(String[] args) throws Exception {
    Configuration argConf = getConf();

    // JobConf conf = new JobConf(diffdb.class);
    Configuration config = HBaseConfiguration.create();
    HBaseAdmin hbAdmin = new HBaseAdmin(config);
    dbutil db_util = new dbutil(config);

    HTable runTable = new HTable(config, "gestore_runs");
    Get runGet = new Get(argConf.get("id").getBytes());
    Result pipeline = runTable.get(runGet);

    NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes());

    Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry();

    HashMap<String, HashMap<String, String>> resultMap =
        new HashMap<String, HashMap<String, String>>();

    while (results != null) {
      String resultKey = new String(results.getKey());
      String resultValue = new String(results.getValue());
      String field = "type";
      HashMap<String, String> tempMap = new HashMap<String, String>();
      String entry = resultKey;

      if (resultKey.endsWith("_db_timestamp")) {
        field = "db_timestamp";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp"));
      } else if (resultKey.endsWith("_filename")) {
        field = "filename";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_filename"));
      } else if (resultKey.endsWith("_regex")) {
        field = "regex";
        entry = resultKey.substring(0, resultKey.lastIndexOf("_regex"));
      }

      if (resultMap.containsKey(entry)) {
        tempMap = resultMap.get(entry);
      }

      tempMap.put(field, resultValue);
      resultMap.put(entry, tempMap);

      // System.out.println("Key: " + resultKey + " Value: " + resultValue);
      results = pipeMap.pollFirstEntry();
    }

    for (String key : resultMap.keySet()) {
      System.out.println("File ID: " + key);
      for (String subKey : resultMap.get(key).keySet()) {
        // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey));
        System.out.format("  %1$-20s  %2$s\n", subKey, resultMap.get(key).get(subKey));
      }
    }

    return 0;
  }
    void getVolumeMap(HashMap<Block, DatanodeBlockInfo> volumeMap, FSVolume volume) {
      if (children != null) {
        for (int i = 0; i < children.length; i++) {
          children[i].getVolumeMap(volumeMap, volume);
        }
      }

      File blockFiles[] = dir.listFiles();
      for (int i = 0; i < blockFiles.length; i++) {
        if (Block.isBlockFilename(blockFiles[i])) {
          long genStamp = getGenerationStampFromFile(blockFiles, blockFiles[i]);
          volumeMap.put(
              new Block(blockFiles[i], blockFiles[i].length(), genStamp),
              new DatanodeBlockInfo(volume, blockFiles[i]));
        }
      }
    }
  /** Complete the block write! */
  public synchronized void finalizeBlock(Block b) throws IOException {
    ActiveFile activeFile = ongoingCreates.get(b);
    if (activeFile == null) {
      throw new IOException("Block " + b + " is already finalized.");
    }
    File f = activeFile.file;
    if (f == null || !f.exists()) {
      throw new IOException("No temporary file " + f + " for block " + b);
    }
    FSVolume v = volumeMap.get(b).getVolume();
    if (v == null) {
      throw new IOException("No volume for temporary file " + f + " for block " + b);
    }

    File dest = null;
    dest = v.addBlock(b, f);
    volumeMap.put(b, new DatanodeBlockInfo(v, dest));
    ongoingCreates.remove(b);
  }
  /**
   * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all
   * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block,
   * and then reopen the file.
   */
  public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException {
    //
    // Make sure the block isn't a valid one - we're still creating it!
    //
    if (isValidBlock(b)) {
      if (!isRecovery) {
        throw new BlockAlreadyExistsException(
            "Block " + b + " is valid, and cannot be written to.");
      }
      // If the block was successfully finalized because all packets
      // were successfully processed at the Datanode but the ack for
      // some of the packets were not received by the client. The client
      // re-opens the connection and retries sending those packets.
      // The other reason is that an "append" is occurring to this block.
      detachBlock(b, 1);
    }
    long blockSize = b.getNumBytes();

    //
    // Serialize access to /tmp, and check if file already there.
    //
    File f = null;
    List<Thread> threads = null;
    synchronized (this) {
      //
      // Is it already in the create process?
      //
      ActiveFile activeFile = ongoingCreates.get(b);
      if (activeFile != null) {
        f = activeFile.file;
        threads = activeFile.threads;

        if (!isRecovery) {
          throw new BlockAlreadyExistsException(
              "Block "
                  + b
                  + " has already been started (though not completed), and thus cannot be created.");
        } else {
          for (Thread thread : threads) {
            thread.interrupt();
          }
        }
        ongoingCreates.remove(b);
      }
      FSVolume v = null;
      if (!isRecovery) {
        v = volumes.getNextVolume(blockSize);
        // create temporary file to hold block in the designated volume
        f = createTmpFile(v, b);
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else if (f != null) {
        DataNode.LOG.info("Reopen already-open Block for append " + b);
        // create or reuse temporary file to hold block in the
        // designated volume
        v = volumeMap.get(b).getVolume();
        volumeMap.put(b, new DatanodeBlockInfo(v));
      } else {
        // reopening block for appending to it.
        DataNode.LOG.info("Reopen Block for append " + b);
        v = volumeMap.get(b).getVolume();
        f = createTmpFile(v, b);
        File blkfile = getBlockFile(b);
        File oldmeta = getMetaFile(b);
        File newmeta = getMetaFile(f, b);

        // rename meta file to tmp directory
        DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta);
        if (!oldmeta.renameTo(newmeta)) {
          throw new IOException(
              "Block "
                  + b
                  + " reopen failed. "
                  + " Unable to move meta file  "
                  + oldmeta
                  + " to tmp dir "
                  + newmeta);
        }

        // rename block file to tmp directory
        DataNode.LOG.debug("Renaming " + blkfile + " to " + f);
        if (!blkfile.renameTo(f)) {
          if (!f.delete()) {
            throw new IOException(
                "Block " + b + " reopen failed. " + " Unable to remove file " + f);
          }
          if (!blkfile.renameTo(f)) {
            throw new IOException(
                "Block "
                    + b
                    + " reopen failed. "
                    + " Unable to move block file "
                    + blkfile
                    + " to tmp dir "
                    + f);
          }
        }
        volumeMap.put(b, new DatanodeBlockInfo(v));
      }
      if (f == null) {
        DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file.");
        throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file.");
      }
      ongoingCreates.put(b, new ActiveFile(f, threads));
    }

    try {
      if (threads != null) {
        for (Thread thread : threads) {
          thread.join();
        }
      }
    } catch (InterruptedException e) {
      throw new IOException("Recovery waiting for thread interrupted.");
    }

    //
    // Finally, allow a writer to the block file
    // REMIND - mjc - make this a filter stream that enforces a max
    // block size, so clients can't go crazy
    //
    File metafile = getMetaFile(f, b);
    DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length());
    DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length());
    return createBlockWriteStreams(f, metafile);
  }
  /**
   * Method to go though the HDFS filesystem in a DFS to find all files
   *
   * <p>fs:FileSystem object from HDFS minDate: Oldest date for files to be backed up maxDate:Newest
   * date for files to be backed up p:Path in HDFS to look for files pathList:Will be filled with
   * all files in p hmTimestamps: hashmap of timestamps for later sorting
   */
  public void checkDir(
      FileSystem fs,
      long minDate,
      long maxDate,
      Path p,
      ArrayList<Path> pathList,
      HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
      String sPath = p.toUri().getPath();

      // If this is a directory
      if (fs.getFileStatus(p).isDir()) {
        // ignore certain directories
        if ("dfstmp".equals(p.getName())
            || "tmp".equals(p.getName())
            || "jobtracker".equals(p.getName())
            || sPath.startsWith("/mapred")
            || "ops".equals(p.getName())
            || p.getName().startsWith("_distcp_logs")) {
          return;
        }

        // dump the mkdir and chmod commands for this
        // directory -- skip root directory only
        {
          FileStatus stat = fs.getFileStatus(p);

          if (!sPath.equals("/")) {
            m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
          }

          m_wrChmods.println(
              "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

          Short sh = new Short(stat.getPermission().toShort());
          m_wrChmods.println(
              "hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
        }

        fStat = fs.listStatus(p);

        // Do a recursive call to all elements
        for (int i = 0; i < fStat.length; i++) {
          checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
        }
      } else {
        // If not a directory then we've found a file

        // ignore crc files
        if (p.getName().endsWith(".crc")) {
          return;
        }

        // ignore other files
        if (sPath.startsWith("/user/oozie/etl/workflows/")) {
          return;
        }

        // try to get the table name from the path. There are
        // various types of tables, from those replicated from
        // another database to regular hive tables to
        // partitioned hive tables.  We use table names to
        // both exclude some from the backup, and for the rest
        // to dump out the schema and partition name.
        if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
          m_nIgnoredTables++;

          if (m_nIgnoredTables < 5) {
            System.out.println("Skipping ignore-table file: " + sPath);
          } else if (m_nIgnoredTables == 5) {
            System.out.println("(...not showing other skipped tables...)");
          }
          return;
        }

        FileStatus stat = fs.getFileStatus(p);

        tmpDate = stat.getModificationTime() / 1000;

        // store the chmods/chowns for all files
        m_wrChmods.println(
            "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

        m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

        // check dates.  is it too young?
        if (tmpDate < minDate) {
          return;
        }

        // is the file too recent?
        if (tmpDate > maxDate) {
          // System.out.println("file too recent: " + sPath);
          return;
        }

        // file timestamp is ok
        pathList.add(p);

        hmTimestamps.put(p, new Long(tmpDate));

        // store info about total bytes neeed to backup
        m_nTotalBytes += fs.getContentSummary(p).getLength();
      }
    } catch (IOException e) {
      System.err.println("ERROR: could not open " + p + ": " + e);

      // System.exit(1) ;
    }
  }