Пример #1
0
  @Override
  @SuppressWarnings("unchecked")
  protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
    if (!Project.getProject().isMetadataCollectStandard()) {
      // write summary headers with all metadata, but for standard metadata don't write the last
      // line
      context.write(new Text("Hash"), new Text(columnMetadata.delimiterSeparatedHeaders()));
    }
    zipFileWriter.closeZip();

    if (Project.getProject().isLuceneIndexEnabled()) {
      mergeLuceneIndex();
    }

    Project project = Project.getProject();
    if (project.isEnvHadoop()) {
      String outputPath = Project.getProject().getProperty(ParameterProcessing.OUTPUT_DIR_HADOOP);
      String zipFileName = zipFileWriter.getZipFileName();
      if (project.isFsHdfs()) {
        String cmd =
            "hadoop fs -copyFromLocal "
                + zipFileName
                + " "
                + outputPath
                + File.separator
                + context.getTaskAttemptID()
                + ".zip";
        OsUtil.runCommand(cmd);
      } else if (project.isFsS3()) {
        S3Agent s3agent = new S3Agent();
        String run = project.getRun();
        if (!run.isEmpty()) {
          run = run + "/";
        }
        String s3key =
            project.getProjectCode()
                + File.separator
                + "output/"
                + run
                + "results/"
                + context.getTaskAttemptID()
                + ".zip";
        // Keep updating the hadoop progress
        int refreshInterval = 60000;
        Timer timer = new Timer(refreshInterval, this);
        timer.start();
        s3agent.putFileInS3(zipFileName, s3key);
        timer.stop();
      }
    }
    Stats.getInstance().setJobFinished();
  }
Пример #2
0
  public static synchronized SolrIndex getInstance() {
    if (__instance == null) {
      if (Project.getProject().isSendIndexToSolrEnabled()) {
        __instance = new HttpSolrIndex();
      } else {
        __instance = new DisabledSolrIndex();
      }
    }

    return __instance;
  }
Пример #3
0
  @Override
  @SuppressWarnings("unchecked")
  protected void setup(Reducer.Context context) throws IOException, InterruptedException {
    this.context = context;
    String settingsStr = context.getConfiguration().get(ParameterProcessing.SETTINGS_STR);
    Settings settings = Settings.loadFromString(settingsStr);
    Settings.setSettings(settings);

    String projectStr = context.getConfiguration().get(ParameterProcessing.PROJECT);
    Project project = Project.loadFromString(projectStr);
    if (project.isEnvHadoop()) {
      String metadataFileContents =
          context.getConfiguration().get(ParameterProcessing.METADATA_FILE);
      new File(ColumnMetadata.metadataNamesFile).getParentFile().mkdirs();
      Files.write(metadataFileContents.getBytes(), new File(ColumnMetadata.metadataNamesFile));
    }
    columnMetadata = new ColumnMetadata();
    String fileSeparatorStr = project.getFieldSeparator();
    char fieldSeparatorChar = Delimiter.getDelim(fileSeparatorStr);
    columnMetadata.setFieldSeparator(String.valueOf(fieldSeparatorChar));
    columnMetadata.setAllMetadata(project.getMetadataCollect());
    // write standard metadata fields
    context.write(null, new Text(columnMetadata.delimiterSeparatedHeaders()));
    zipFileWriter.setup();
    zipFileWriter.openZipForWriting();

    luceneIndex = new LuceneIndex(settings.getLuceneIndexDir(), project.getProjectCode(), null);
    luceneIndex.init();
  }
Пример #4
0
  private void mergeLuceneIndex() throws IOException {
    String luceneDir = Settings.getSettings().getLuceneIndexDir();
    String hdfsLuceneDir =
        "/" + luceneDir + File.separator + Project.getProject().getProjectCode() + File.separator;

    String localLuceneTempDir = luceneDir + File.separator + "tmp" + File.separator;
    File localLuceneTempDirFile = new File(localLuceneTempDir);

    if (localLuceneTempDirFile.exists()) {
      Util.deleteDirectory(localLuceneTempDirFile);
    }

    localLuceneTempDirFile.mkdir();

    // copy all zip lucene indexes, created by maps to local hd
    String cmd = "hadoop fs -copyToLocal " + hdfsLuceneDir + "* " + localLuceneTempDir;
    OsUtil.runCommand(cmd);

    // remove the map indexes as they are now copied to local
    String removeOldZips = "hadoop fs -rm " + hdfsLuceneDir + "*";
    OsUtil.runCommand(removeOldZips);

    logger.trace("Lucene index files collected to: {}", localLuceneTempDirFile.getAbsolutePath());

    String[] zipFilesArr = localLuceneTempDirFile.list();
    for (String indexZipFileStr : zipFilesArr) {
      String indexZipFileName = localLuceneTempDir + indexZipFileStr;
      String unzipToDir = localLuceneTempDir + indexZipFileStr.replace(".zip", "");

      ZipUtil.unzipFile(indexZipFileName, unzipToDir);
      File indexDir = new File(unzipToDir);

      FSDirectory fsDir = FSDirectory.open(indexDir);
      luceneIndex.addToIndex(fsDir);
    }
    // TODO check if we need to push the index to S3 or somewhere else
    luceneIndex.destroy();
  }
Пример #5
0
    @Override
    public void init() {
      try {
        String endpoint = getSolrEndpoint();

        if (supportMultipleProjects) {
          String projectCode = Project.getProject().getProjectCode();
          String command =
              endpoint
                  + "solr/admin/cores?action=CREATE&name="
                  + SOLR_INSTANCE_DIR
                  + "_"
                  + projectCode
                  + "&instanceDir="
                  + SOLR_INSTANCE_DIR
                  + "&config=solrconfig.xml&dataDir=data_"
                  + projectCode
                  + "&schema=schema.xml";
          sendGetCommand(command);

          this.updateUrl = endpoint + "solr/" + SOLR_INSTANCE_DIR + "_" + projectCode + "/update";
        } else {
          sendGetCommand(endpoint + "solr/admin/ping");

          this.updateUrl = endpoint + "solr/update";
        }

        String deleteAll = "<delete><query>id:[*TO *]</query></delete>";
        sendPostCommand(updateUrl, deleteAll);
        sendPostCommand(updateUrl, "<commit/>");

      } catch (SolrException se) {
        History.appendToHistory("Problem with SOLR init: " + se.getMessage());
        // se.printStackTrace();
      }
    }