Example #1
0
 /**
  * Delete directory with everything underneath. Note that in the case of *nix we use 'rm -fr
  * <dir>, because of the known problems with recursive deletes, and because 'rm -fr' is probably
  * faster.
  *
  * @param dir directory to delete.
  * @throws IOException on any problem with delete.
  */
 public static void deleteDirectory(File dir) throws IOException {
   if (OsUtil.isNix()) {
     OsUtil.runCommand("rm -fr " + dir.getPath());
   } else {
     FileUtils.deleteDirectory(dir);
   }
 }
  @Override
  @SuppressWarnings("unchecked")
  protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
    if (!Project.getProject().isMetadataCollectStandard()) {
      // write summary headers with all metadata, but for standard metadata don't write the last
      // line
      context.write(new Text("Hash"), new Text(columnMetadata.delimiterSeparatedHeaders()));
    }
    zipFileWriter.closeZip();

    if (Project.getProject().isLuceneIndexEnabled()) {
      mergeLuceneIndex();
    }

    Project project = Project.getProject();
    if (project.isEnvHadoop()) {
      String outputPath = Project.getProject().getProperty(ParameterProcessing.OUTPUT_DIR_HADOOP);
      String zipFileName = zipFileWriter.getZipFileName();
      if (project.isFsHdfs()) {
        String cmd =
            "hadoop fs -copyFromLocal "
                + zipFileName
                + " "
                + outputPath
                + File.separator
                + context.getTaskAttemptID()
                + ".zip";
        OsUtil.runCommand(cmd);
      } else if (project.isFsS3()) {
        S3Agent s3agent = new S3Agent();
        String run = project.getRun();
        if (!run.isEmpty()) {
          run = run + "/";
        }
        String s3key =
            project.getProjectCode()
                + File.separator
                + "output/"
                + run
                + "results/"
                + context.getTaskAttemptID()
                + ".zip";
        // Keep updating the hadoop progress
        int refreshInterval = 60000;
        Timer timer = new Timer(refreshInterval, this);
        timer.start();
        s3agent.putFileInS3(zipFileName, s3key);
        timer.stop();
      }
    }
    Stats.getInstance().setJobFinished();
  }
  private void mergeLuceneIndex() throws IOException {
    String luceneDir = Settings.getSettings().getLuceneIndexDir();
    String hdfsLuceneDir =
        "/" + luceneDir + File.separator + Project.getProject().getProjectCode() + File.separator;

    String localLuceneTempDir = luceneDir + File.separator + "tmp" + File.separator;
    File localLuceneTempDirFile = new File(localLuceneTempDir);

    if (localLuceneTempDirFile.exists()) {
      Util.deleteDirectory(localLuceneTempDirFile);
    }

    localLuceneTempDirFile.mkdir();

    // copy all zip lucene indexes, created by maps to local hd
    String cmd = "hadoop fs -copyToLocal " + hdfsLuceneDir + "* " + localLuceneTempDir;
    OsUtil.runCommand(cmd);

    // remove the map indexes as they are now copied to local
    String removeOldZips = "hadoop fs -rm " + hdfsLuceneDir + "*";
    OsUtil.runCommand(removeOldZips);

    logger.trace("Lucene index files collected to: {}", localLuceneTempDirFile.getAbsolutePath());

    String[] zipFilesArr = localLuceneTempDirFile.list();
    for (String indexZipFileStr : zipFilesArr) {
      String indexZipFileName = localLuceneTempDir + indexZipFileStr;
      String unzipToDir = localLuceneTempDir + indexZipFileStr.replace(".zip", "");

      ZipUtil.unzipFile(indexZipFileName, unzipToDir);
      File indexDir = new File(unzipToDir);

      FSDirectory fsDir = FSDirectory.open(indexDir);
      luceneIndex.addToIndex(fsDir);
    }
    // TODO check if we need to push the index to S3 or somewhere else
    luceneIndex.destroy();
  }