Ejemplo n.º 1
0
  /**
   * Checks for the existence of an existing ARFF header file in HDFS or the local file system. If
   * local, it is copied into HDFS.
   *
   * @throws DistributedWekaException if the file does not exist or there is a problem transfering
   *     it into HDFS
   */
  protected void handleExistingHeaderFile() throws DistributedWekaException {

    String existingPath = getPathToExistingHeader();

    try {
      existingPath = environmentSubstitute(existingPath);
    } catch (Exception ex) {
    }

    // check local file system first
    File f = new File(existingPath);
    boolean success = false;
    if (f.exists()) {
      // copy this file into HDFS
      String hdfsDest = HDFSUtils.WEKA_TEMP_DISTRIBUTED_CACHE_FILES + f.getName();

      try {
        HDFSUtils.copyToHDFS(existingPath, hdfsDest, m_mrConfig.getHDFSConfig(), m_env, true);

        m_hdfsPathToAggregatedHeader = hdfsDest;
        Configuration conf = new Configuration();
        m_mrConfig.getHDFSConfig().configureForHadoop(conf, m_env);
        getFinalHeaderFromHDFS(conf, hdfsDest);
        success = true;
      } catch (IOException e) {
        throw new DistributedWekaException(e);
      }
    } else {
      try {
        Path p = new Path(existingPath);
        Configuration conf = new Configuration();
        m_mrConfig.getHDFSConfig().configureForHadoop(conf, m_env);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(p)) {
          m_hdfsPathToAggregatedHeader = existingPath;
          getFinalHeaderFromHDFS(conf, existingPath);
          success = true;
        }
      } catch (IOException ex) {
        throw new DistributedWekaException(ex);
      }
    }

    if (!success) {
      throw new DistributedWekaException(
          "Was unable to find '"
              + existingPath
              + "' on either "
              + "the local file system or in HDFS");
    }
  }