@Override
  @SuppressWarnings("unchecked")
  protected void setup(Reducer.Context context) throws IOException, InterruptedException {
    this.context = context;
    String settingsStr = context.getConfiguration().get(ParameterProcessing.SETTINGS_STR);
    Settings settings = Settings.loadFromString(settingsStr);
    Settings.setSettings(settings);

    String projectStr = context.getConfiguration().get(ParameterProcessing.PROJECT);
    Project project = Project.loadFromString(projectStr);
    if (project.isEnvHadoop()) {
      String metadataFileContents =
          context.getConfiguration().get(ParameterProcessing.METADATA_FILE);
      new File(ColumnMetadata.metadataNamesFile).getParentFile().mkdirs();
      Files.write(metadataFileContents.getBytes(), new File(ColumnMetadata.metadataNamesFile));
    }
    columnMetadata = new ColumnMetadata();
    String fileSeparatorStr = project.getFieldSeparator();
    char fieldSeparatorChar = Delimiter.getDelim(fileSeparatorStr);
    columnMetadata.setFieldSeparator(String.valueOf(fieldSeparatorChar));
    columnMetadata.setAllMetadata(project.getMetadataCollect());
    // write standard metadata fields
    context.write(null, new Text(columnMetadata.delimiterSeparatedHeaders()));
    zipFileWriter.setup();
    zipFileWriter.openZipForWriting();

    luceneIndex = new LuceneIndex(settings.getLuceneIndexDir(), project.getProjectCode(), null);
    luceneIndex.init();
  }
Beispiel #2
0
  private void installFreeEed() throws Exception {
    String url = Settings.getSettings().getDownloadLink();
    logger.info("Installing FreeEed software from " + url);
    String cmd =
        "rm FreeEed.zip; "
            + "wget "
            + url
            + " -O FreeEed.zip --no-check-certificate; "
            + "rm -fr FreeEed; "
            + "unzip -P 4ushH7XZT1 FreeEed.zip";
    SSHAgent sshAgent = new SSHAgent();
    sshAgent.setUser(ParameterProcessing.CLUSTER_USER_NAME);
    sshAgent.setKey(ParameterProcessing.PEM_CERTIFICATE_NAME);
    sshAgent.setHost(cluster.getJobTracker().getDnsName());
    sshAgent.executeCommand(cmd);
    logger.info("Successfully installed FreeEed");
    // copy the settings to jobtracker
    Server server = cluster.getJobTracker();
    sshAgent.setHost(server.getDnsName());

    Settings cloneForS3 = Settings.getSettings().cloneForS3();
    String settingsFileToUse = "settings.properties.s3";
    Util.writeTextFile(settingsFileToUse, cloneForS3.toString());

    logger.info("Copying settings file: {}", settingsFileToUse);
    // TODO change passing the settings to the cloud
    // sshAgent.scpTo(settingsFileToUse, "FreeEed/" + ParameterProcessing.DEFAULT_SETTINGS);
  }
    @Override
    public void run() {
      SwingUtilities.invokeLater(
          new Runnable() {

            @Override
            public void run() {
              clusterStatusText.setText("Querying EC2, please wait...");
            }
          });
      EC2Agent agent = new EC2Agent();
      status = "Cluster status:\nUnknown";
      try {
        status = agent.describeRunningInstances();
        if (agent.getRunningInstances(true).size() == Settings.getSettings().getClusterSize()) {
          status += "\n\n" + agent.getClusterState();
        }
      } catch (Exception e) {
        e.printStackTrace(System.out);
      }
      SwingUtilities.invokeLater(
          new Runnable() {

            @Override
            public void run() {
              clusterStatusText.setText(status);
            }
          });
    }
Beispiel #4
0
  protected String getSolrEndpoint() throws SolrException {
    String endpoint = Settings.getSettings().getSolrEndpoint();

    if (endpoint == null || endpoint.length() == 0) {
      throw new SolrException("Endpoint not configured");
    }

    if (endpoint.endsWith("/")) {
      return endpoint;
    }

    return endpoint + "/";
  }
  private void mergeLuceneIndex() throws IOException {
    String luceneDir = Settings.getSettings().getLuceneIndexDir();
    String hdfsLuceneDir =
        "/" + luceneDir + File.separator + Project.getProject().getProjectCode() + File.separator;

    String localLuceneTempDir = luceneDir + File.separator + "tmp" + File.separator;
    File localLuceneTempDirFile = new File(localLuceneTempDir);

    if (localLuceneTempDirFile.exists()) {
      Util.deleteDirectory(localLuceneTempDirFile);
    }

    localLuceneTempDirFile.mkdir();

    // copy all zip lucene indexes, created by maps to local hd
    String cmd = "hadoop fs -copyToLocal " + hdfsLuceneDir + "* " + localLuceneTempDir;
    OsUtil.runCommand(cmd);

    // remove the map indexes as they are now copied to local
    String removeOldZips = "hadoop fs -rm " + hdfsLuceneDir + "*";
    OsUtil.runCommand(removeOldZips);

    logger.trace("Lucene index files collected to: {}", localLuceneTempDirFile.getAbsolutePath());

    String[] zipFilesArr = localLuceneTempDirFile.list();
    for (String indexZipFileStr : zipFilesArr) {
      String indexZipFileName = localLuceneTempDir + indexZipFileStr;
      String unzipToDir = localLuceneTempDir + indexZipFileStr.replace(".zip", "");

      ZipUtil.unzipFile(indexZipFileName, unzipToDir);
      File indexDir = new File(unzipToDir);

      FSDirectory fsDir = FSDirectory.open(indexDir);
      luceneIndex.addToIndex(fsDir);
    }
    // TODO check if we need to push the index to S3 or somewhere else
    luceneIndex.destroy();
  }
  private void startCluster() {
    History.appendToHistory(
        "Starting a cluster of " + Settings.getSettings().getClusterSize() + " nodes.");
    final int refreshMillis = 5000;

    new Thread(
            new Runnable() {

              @Override
              public void run() {
                try {
                  EC2Agent agent = new EC2Agent();
                  // MK debugging, do not start cluster when working an an image
                  if (!Settings.getSettings().skipInstanceCreation()) {
                    agent.launchInstances();
                  }

                  // cluster is given the certain number of minutes to start
                  int clusterTimeoutMin = Settings.getSettings().getClusterTimeoutMin();
                  int attempts = (clusterTimeoutMin * 60 * 1000) / refreshMillis;
                  int attempt = 0;
                  while (!allInstancesUp()) {
                    ++attempt;
                    History.appendToHistory("Check # " + attempt);
                    History.appendToHistory("Waiting for all instances to initialize...");
                    Thread.sleep(refreshMillis);
                    refreshStatus();
                    if (attempt > attempts) {
                      SwingUtilities.invokeLater(
                          new Runnable() {

                            @Override
                            public void run() {
                              JOptionPane.showMessageDialog(
                                  null,
                                  "The cluster could not be started.\n"
                                      + "Please try again at a later time.");
                            }
                          });
                      return;
                    }
                  }
                } catch (final Exception e) {
                  SwingUtilities.invokeLater(
                      new Runnable() {

                        @Override
                        public void run() {
                          e.printStackTrace(System.out);
                          JOptionPane.showMessageDialog(null, e.getMessage());
                        }
                      });
                }
                HadoopAgent.setHadoopReady(false);
                History.appendToHistory("Starting Hadoop cluster setup");
                HadoopAgent hadoopAgent = new HadoopAgent();
                hadoopAgent.setCallingUI(ClusterControlUI.this);
                hadoopAgent.setupAndStart();
              }
            })
        .start();
  }