示例#1
0
  @Override
  @SuppressWarnings("unchecked")
  protected void setup(Reducer.Context context) throws IOException, InterruptedException {
    this.context = context;
    String settingsStr = context.getConfiguration().get(ParameterProcessing.SETTINGS_STR);
    Settings settings = Settings.loadFromString(settingsStr);
    Settings.setSettings(settings);

    String projectStr = context.getConfiguration().get(ParameterProcessing.PROJECT);
    Project project = Project.loadFromString(projectStr);
    if (project.isEnvHadoop()) {
      String metadataFileContents =
          context.getConfiguration().get(ParameterProcessing.METADATA_FILE);
      new File(ColumnMetadata.metadataNamesFile).getParentFile().mkdirs();
      Files.write(metadataFileContents.getBytes(), new File(ColumnMetadata.metadataNamesFile));
    }
    columnMetadata = new ColumnMetadata();
    String fileSeparatorStr = project.getFieldSeparator();
    char fieldSeparatorChar = Delimiter.getDelim(fileSeparatorStr);
    columnMetadata.setFieldSeparator(String.valueOf(fieldSeparatorChar));
    columnMetadata.setAllMetadata(project.getMetadataCollect());
    // write standard metadata fields
    context.write(null, new Text(columnMetadata.delimiterSeparatedHeaders()));
    zipFileWriter.setup();
    zipFileWriter.openZipForWriting();

    luceneIndex = new LuceneIndex(settings.getLuceneIndexDir(), project.getProjectCode(), null);
    luceneIndex.init();
  }
示例#2
0
  @Override
  @SuppressWarnings("unchecked")
  protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
    if (!Project.getProject().isMetadataCollectStandard()) {
      // write summary headers with all metadata, but for standard metadata don't write the last
      // line
      context.write(new Text("Hash"), new Text(columnMetadata.delimiterSeparatedHeaders()));
    }
    zipFileWriter.closeZip();

    if (Project.getProject().isLuceneIndexEnabled()) {
      mergeLuceneIndex();
    }

    Project project = Project.getProject();
    if (project.isEnvHadoop()) {
      String outputPath = Project.getProject().getProperty(ParameterProcessing.OUTPUT_DIR_HADOOP);
      String zipFileName = zipFileWriter.getZipFileName();
      if (project.isFsHdfs()) {
        String cmd =
            "hadoop fs -copyFromLocal "
                + zipFileName
                + " "
                + outputPath
                + File.separator
                + context.getTaskAttemptID()
                + ".zip";
        OsUtil.runCommand(cmd);
      } else if (project.isFsS3()) {
        S3Agent s3agent = new S3Agent();
        String run = project.getRun();
        if (!run.isEmpty()) {
          run = run + "/";
        }
        String s3key =
            project.getProjectCode()
                + File.separator
                + "output/"
                + run
                + "results/"
                + context.getTaskAttemptID()
                + ".zip";
        // Keep updating the hadoop progress
        int refreshInterval = 60000;
        Timer timer = new Timer(refreshInterval, this);
        timer.start();
        s3agent.putFileInS3(zipFileName, s3key);
        timer.stop();
      }
    }
    Stats.getInstance().setJobFinished();
  }