private void validateHadoopFS(List<ConfigIssue> issues) {
    boolean validHapoopFsUri = true;
    hadoopConf = getHadoopConfiguration(issues);
    String hdfsUriInConf;
    if (hdfsUri != null && !hdfsUri.isEmpty()) {
      hadoopConf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, hdfsUri);
    } else {
      hdfsUriInConf = hadoopConf.get(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY);
      if (hdfsUriInConf == null) {
        issues.add(
            getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsUri", Errors.HADOOPFS_19));
        return;
      } else {
        hdfsUri = hdfsUriInConf;
      }
    }
    if (hdfsUri.contains("://")) {
      try {
        URI uri = new URI(hdfsUri);
        if (!"hdfs".equals(uri.getScheme())) {
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.HADOOP_FS.name(),
                      "hdfsUri",
                      Errors.HADOOPFS_12,
                      hdfsUri,
                      uri.getScheme()));
          validHapoopFsUri = false;
        } else if (uri.getAuthority() == null) {
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.HADOOP_FS.name(), "hdfsUri", Errors.HADOOPFS_13, hdfsUri));
          validHapoopFsUri = false;
        }
      } catch (Exception ex) {
        issues.add(
            getContext()
                .createConfigIssue(
                    Groups.HADOOP_FS.name(),
                    "hdfsUri",
                    Errors.HADOOPFS_22,
                    hdfsUri,
                    ex.getMessage(),
                    ex));
        validHapoopFsUri = false;
      }
    } else {
      issues.add(
          getContext()
              .createConfigIssue(Groups.HADOOP_FS.name(), "hdfsUri", Errors.HADOOPFS_02, hdfsUri));
      validHapoopFsUri = false;
    }

    StringBuilder logMessage = new StringBuilder();
    try {
      // forcing UGI to initialize with the security settings from the stage
      UserGroupInformation.setConfiguration(hadoopConf);
      Subject subject = Subject.getSubject(AccessController.getContext());
      if (UserGroupInformation.isSecurityEnabled()) {
        loginUgi = UserGroupInformation.getUGIFromSubject(subject);
      } else {
        UserGroupInformation.loginUserFromSubject(subject);
        loginUgi = UserGroupInformation.getLoginUser();
      }
      LOG.info(
          "Subject = {}, Principals = {}, Login UGI = {}",
          subject,
          subject == null ? "null" : subject.getPrincipals(),
          loginUgi);
      if (hdfsKerberos) {
        logMessage.append("Using Kerberos");
        if (loginUgi.getAuthenticationMethod()
            != UserGroupInformation.AuthenticationMethod.KERBEROS) {
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.HADOOP_FS.name(),
                      "hdfsKerberos",
                      Errors.HADOOPFS_00,
                      loginUgi.getAuthenticationMethod(),
                      UserGroupInformation.AuthenticationMethod.KERBEROS));
        }
      } else {
        logMessage.append("Using Simple");
        hadoopConf.set(
            CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION,
            UserGroupInformation.AuthenticationMethod.SIMPLE.name());
      }
      if (validHapoopFsUri) {
        getUGI()
            .doAs(
                new PrivilegedExceptionAction<Void>() {
                  @Override
                  public Void run() throws Exception {
                    try (FileSystem fs = getFileSystemForInitDestroy()) { // to trigger the close
                    }
                    return null;
                  }
                });
      }
    } catch (Exception ex) {
      LOG.info("Error connecting to FileSystem: " + ex, ex);
      issues.add(
          getContext()
              .createConfigIssue(
                  Groups.HADOOP_FS.name(),
                  null,
                  Errors.HADOOPFS_11,
                  hdfsUri,
                  String.valueOf(ex),
                  ex));
    }
    LOG.info("Authentication Config: " + logMessage);
  }
 @Override
 public List<ConfigIssue> init() {
   List<ConfigIssue> issues = super.init();
   validateHadoopFS(issues);
   // This is for getting no of splits - no of executors
   hadoopConf.set(FileInputFormat.LIST_STATUS_NUM_THREADS, "5"); // Per Hive-on-Spark
   hadoopConf.set(FileInputFormat.SPLIT_MAXSIZE, String.valueOf(750000000)); // Per Hive-on-Spark
   for (Map.Entry<String, String> config : hdfsConfigs.entrySet()) {
     hadoopConf.set(config.getKey(), config.getValue());
   }
   List<Path> hdfsDirPaths = new ArrayList<>();
   if (hdfsDirLocations == null || hdfsDirLocations.isEmpty()) {
     issues.add(
         getContext()
             .createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_18));
   } else if (issues.isEmpty()) {
     for (String hdfsDirLocation : hdfsDirLocations) {
       try {
         FileSystem fs = getFileSystemForInitDestroy();
         Path ph = fs.makeQualified(new Path(hdfsDirLocation));
         hdfsDirPaths.add(ph);
         if (!fs.exists(ph)) {
           issues.add(
               getContext()
                   .createConfigIssue(
                       Groups.HADOOP_FS.name(),
                       "hdfsDirLocations",
                       Errors.HADOOPFS_10,
                       hdfsDirLocation));
         } else if (!fs.getFileStatus(ph).isDirectory()) {
           issues.add(
               getContext()
                   .createConfigIssue(
                       Groups.HADOOP_FS.name(),
                       "hdfsDirLocations",
                       Errors.HADOOPFS_15,
                       hdfsDirLocation));
         } else {
           try {
             FileStatus[] files = fs.listStatus(ph);
             if (files == null || files.length == 0) {
               issues.add(
                   getContext()
                       .createConfigIssue(
                           Groups.HADOOP_FS.name(),
                           "hdfsDirLocations",
                           Errors.HADOOPFS_16,
                           hdfsDirLocation));
             } else if (getContext().isPreview() && previewBuffer.size() < PREVIEW_SIZE) {
               for (FileStatus fileStatus : files) {
                 if (fileStatus.isFile()) {
                   String path = fileStatus.getPath().toString();
                   try {
                     List<Map.Entry> buffer;
                     if (dataFormat == DataFormat.AVRO) {
                       buffer = previewAvroBatch(fileStatus, PREVIEW_SIZE);
                     } else {
                       buffer = previewTextBatch(fileStatus, PREVIEW_SIZE);
                     }
                     for (int i = 0;
                         i < buffer.size() && previewBuffer.size() < PREVIEW_SIZE;
                         i++) {
                       Map.Entry entry = buffer.get(i);
                       previewBuffer.put(
                           String.valueOf(entry.getKey()),
                           entry.getValue() == null ? null : entry.getValue());
                     }
                   } catch (IOException | InterruptedException ex) {
                     String msg = "Error opening " + path + ": " + ex;
                     LOG.info(msg, ex);
                     issues.add(
                         getContext()
                             .createConfigIssue(
                                 Groups.HADOOP_FS.name(),
                                 "hdfsDirLocations",
                                 Errors.HADOOPFS_16,
                                 fileStatus.getPath()));
                   }
                 }
               }
             }
           } catch (IOException ex) {
             issues.add(
                 getContext()
                     .createConfigIssue(
                         Groups.HADOOP_FS.name(),
                         "hdfsDirLocations",
                         Errors.HADOOPFS_09,
                         hdfsDirLocation,
                         ex.toString(),
                         ex));
           }
         }
       } catch (IOException ioe) {
         LOG.warn("Error connecting to HDFS filesystem: " + ioe, ioe);
         issues.add(
             getContext()
                 .createConfigIssue(
                     Groups.HADOOP_FS.name(),
                     "hdfsDirLocations",
                     Errors.HADOOPFS_11,
                     hdfsDirLocation,
                     ioe.toString(),
                     ioe));
       }
     }
   }
   hadoopConf.set(FileInputFormat.INPUT_DIR, StringUtils.join(hdfsDirPaths, ","));
   hadoopConf.set(FileInputFormat.INPUT_DIR_RECURSIVE, Boolean.toString(recursive));
   switch (dataFormat) {
     case JSON:
       if (jsonMaxObjectLen < 1) {
         issues.add(
             getContext()
                 .createConfigIssue(Groups.JSON.name(), "jsonMaxObjectLen", Errors.HADOOPFS_04));
       }
       break;
     case TEXT:
       if (textMaxLineLen < 1) {
         issues.add(
             getContext()
                 .createConfigIssue(Groups.TEXT.name(), "textMaxLineLen", Errors.HADOOPFS_05));
       }
       break;
     case LOG:
       logDataFormatValidator =
           new LogDataFormatValidator(
               logMode,
               logMaxObjectLen,
               retainOriginalLine,
               customLogFormat,
               regex,
               grokPatternDefinition,
               grokPattern,
               enableLog4jCustomLogFormat,
               log4jCustomLogFormat,
               OnParseError.ERROR,
               0,
               Groups.LOG.name(),
               getFieldPathToGroupMap(fieldPathsToGroupName));
       logDataFormatValidator.validateLogFormatConfig(issues, getContext());
       break;
     case DELIMITED:
       if (csvMaxObjectLen < 1) {
         issues.add(
             getContext()
                 .createConfigIssue(
                     Groups.DELIMITED.name(), "csvMaxObjectLen", Errors.HADOOPFS_30));
       }
       break;
     case AVRO:
       if (avroSchema != null && !avroSchema.isEmpty()) {
         hadoopConf.set(AvroJob.INPUT_SCHEMA, avroSchema);
         hadoopConf.set(CONF_INPUT_KEY_SCHEMA, avroSchema);
       }
       break;
     default:
       issues.add(
           getContext()
               .createConfigIssue(
                   Groups.LOG.name(), "dataFormat", Errors.HADOOPFS_06, dataFormat));
   }
   validateParserFactoryConfigs(issues);
   LOG.info("Issues: " + issues);
   return issues;
 }
 Configuration getHadoopConfiguration(List<ConfigIssue> issues) {
   Configuration conf = new Configuration();
   if (hdfsKerberos) {
     conf.set(
         CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION,
         UserGroupInformation.AuthenticationMethod.KERBEROS.name());
     try {
       conf.set(
           DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY,
           "hdfs/_HOST@" + KerberosUtil.getDefaultRealm());
     } catch (Exception ex) {
       if (!hdfsConfigs.containsKey(DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY)) {
         issues.add(
             getContext()
                 .createConfigIssue(
                     Groups.HADOOP_FS.name(), null, Errors.HADOOPFS_28, ex.getMessage()));
       }
     }
   }
   if (hadoopConfDir != null && !hadoopConfDir.isEmpty()) {
     File hadoopConfigDir = new File(hadoopConfDir);
     if (hadoopConfigDir.isAbsolute()) {
       // Do not allow absolute hadoop config directory in cluster mode
       issues.add(
           getContext()
               .createConfigIssue(
                   Groups.HADOOP_FS.name(), "hadoopConfDir", Errors.HADOOPFS_29, hadoopConfDir));
     } else {
       hadoopConfigDir =
           new File(getContext().getResourcesDirectory(), hadoopConfDir).getAbsoluteFile();
     }
     if (!hadoopConfigDir.exists()) {
       issues.add(
           getContext()
               .createConfigIssue(
                   Groups.HADOOP_FS.name(),
                   "hdfsConfDir",
                   Errors.HADOOPFS_25,
                   hadoopConfigDir.getPath()));
     } else if (!hadoopConfigDir.isDirectory()) {
       issues.add(
           getContext()
               .createConfigIssue(
                   Groups.HADOOP_FS.name(),
                   "hdfsConfDir",
                   Errors.HADOOPFS_26,
                   hadoopConfigDir.getPath()));
     } else {
       File coreSite = new File(hadoopConfigDir, "core-site.xml");
       if (coreSite.exists()) {
         if (!coreSite.isFile()) {
           issues.add(
               getContext()
                   .createConfigIssue(
                       Groups.HADOOP_FS.name(),
                       "hdfsConfDir",
                       Errors.HADOOPFS_27,
                       coreSite.getPath()));
         }
         conf.addResource(new Path(coreSite.getAbsolutePath()));
       }
       File hdfsSite = new File(hadoopConfigDir, "hdfs-site.xml");
       if (hdfsSite.exists()) {
         if (!hdfsSite.isFile()) {
           issues.add(
               getContext()
                   .createConfigIssue(
                       Groups.HADOOP_FS.name(),
                       "hdfsConfDir",
                       Errors.HADOOPFS_27,
                       hdfsSite.getPath()));
         }
         conf.addResource(new Path(hdfsSite.getAbsolutePath()));
       }
       File yarnSite = new File(hadoopConfigDir, "yarn-site.xml");
       if (yarnSite.exists()) {
         if (!yarnSite.isFile()) {
           issues.add(
               getContext()
                   .createConfigIssue(
                       Groups.HADOOP_FS.name(),
                       "hdfsConfDir",
                       Errors.HADOOPFS_27,
                       yarnSite.getPath()));
         }
         conf.addResource(new Path(yarnSite.getAbsolutePath()));
       }
       File mapredSite = new File(hadoopConfigDir, "mapred-site.xml");
       if (mapredSite.exists()) {
         if (!mapredSite.isFile()) {
           issues.add(
               getContext()
                   .createConfigIssue(
                       Groups.HADOOP_FS.name(),
                       "hdfsConfDir",
                       Errors.HADOOPFS_27,
                       mapredSite.getPath()));
         }
         conf.addResource(new Path(mapredSite.getAbsolutePath()));
       }
     }
   }
   for (Map.Entry<String, String> config : hdfsConfigs.entrySet()) {
     conf.set(config.getKey(), config.getValue());
   }
   return conf;
 }
  @Override
  public List<ConfigIssue> init() {
    List<ConfigIssue> issues = super.init();
    validateHadoopFS(issues);
    // This is for getting no of splits - no of executors
    hadoopConf.set(FileInputFormat.LIST_STATUS_NUM_THREADS, "5"); // Per Hive-on-Spark
    hadoopConf.set(FileInputFormat.SPLIT_MAXSIZE, String.valueOf(750000000)); // Per Hive-on-Spark
    for (Map.Entry<String, String> config : conf.hdfsConfigs.entrySet()) {
      hadoopConf.set(config.getKey(), config.getValue());
    }
    List<Path> hdfsDirPaths = new ArrayList<>();
    if (conf.hdfsDirLocations == null || conf.hdfsDirLocations.isEmpty()) {
      issues.add(
          getContext()
              .createConfigIssue(
                  Groups.HADOOP_FS.name(),
                  ClusterHdfsConfigBean.CLUSTER_HDFS_CONFIG_BEAN_PREFIX + "hdfsDirLocations",
                  Errors.HADOOPFS_18));
    } else if (issues.isEmpty()) {
      for (String hdfsDirLocation : conf.hdfsDirLocations) {
        try {
          FileSystem fs = getFileSystemForInitDestroy();
          Path ph = fs.makeQualified(new Path(hdfsDirLocation));
          hdfsDirPaths.add(ph);
          if (!fs.exists(ph)) {
            issues.add(
                getContext()
                    .createConfigIssue(
                        Groups.HADOOP_FS.name(),
                        ClusterHdfsConfigBean.CLUSTER_HDFS_CONFIG_BEAN_PREFIX + "hdfsDirLocations",
                        Errors.HADOOPFS_10,
                        hdfsDirLocation));
          } else if (!fs.getFileStatus(ph).isDirectory()) {
            issues.add(
                getContext()
                    .createConfigIssue(
                        Groups.HADOOP_FS.name(),
                        ClusterHdfsConfigBean.CLUSTER_HDFS_CONFIG_BEAN_PREFIX + "hdfsDirLocations",
                        Errors.HADOOPFS_15,
                        hdfsDirLocation));
          } else {
            try {
              FileStatus[] files = fs.listStatus(ph);
              if (files == null || files.length == 0) {
                issues.add(
                    getContext()
                        .createConfigIssue(
                            Groups.HADOOP_FS.name(),
                            ClusterHdfsConfigBean.CLUSTER_HDFS_CONFIG_BEAN_PREFIX
                                + "hdfsDirLocations",
                            Errors.HADOOPFS_16,
                            hdfsDirLocation));
              } else if (getContext().isPreview() && previewBuffer.size() < PREVIEW_SIZE) {
                for (FileStatus fileStatus : files) {
                  if (fileStatus.isFile()) {
                    String path = fileStatus.getPath().toString();
                    try {
                      List<Map.Entry> buffer;
                      if (conf.dataFormat == DataFormat.AVRO) {
                        buffer = previewAvroBatch(fileStatus, PREVIEW_SIZE);
                      } else {
                        buffer = previewTextBatch(fileStatus, PREVIEW_SIZE);
                      }
                      for (int i = 0;
                          i < buffer.size() && previewBuffer.size() < PREVIEW_SIZE;
                          i++) {
                        Map.Entry entry = buffer.get(i);
                        previewBuffer.put(
                            String.valueOf(entry.getKey()),
                            entry.getValue() == null ? null : entry.getValue());
                      }
                    } catch (IOException | InterruptedException ex) {
                      String msg = "Error opening " + path + ": " + ex;
                      LOG.info(msg, ex);
                      issues.add(
                          getContext()
                              .createConfigIssue(
                                  Groups.HADOOP_FS.name(),
                                  ClusterHdfsConfigBean.CLUSTER_HDFS_CONFIG_BEAN_PREFIX
                                      + "hdfsDirLocations",
                                  Errors.HADOOPFS_16,
                                  fileStatus.getPath()));
                    }
                  }
                }
              }
            } catch (IOException ex) {
              issues.add(
                  getContext()
                      .createConfigIssue(
                          Groups.HADOOP_FS.name(),
                          ClusterHdfsConfigBean.CLUSTER_HDFS_CONFIG_BEAN_PREFIX
                              + "hdfsDirLocations",
                          Errors.HADOOPFS_09,
                          hdfsDirLocation,
                          ex.toString(),
                          ex));
            }
          }
        } catch (IOException ioe) {
          LOG.warn("Error connecting to HDFS filesystem: " + ioe, ioe);
          issues.add(
              getContext()
                  .createConfigIssue(
                      Groups.HADOOP_FS.name(),
                      ClusterHdfsConfigBean.CLUSTER_HDFS_CONFIG_BEAN_PREFIX + "hdfsDirLocations",
                      Errors.HADOOPFS_11,
                      hdfsDirLocation,
                      ioe.toString(),
                      ioe));
        }
      }
    }
    hadoopConf.set(FileInputFormat.INPUT_DIR, StringUtils.join(hdfsDirPaths, ","));
    hadoopConf.set(FileInputFormat.INPUT_DIR_RECURSIVE, Boolean.toString(conf.recursive));

    // CsvHeader.IGNORE_HEADER must be overridden to CsvHeader.NO_HEADER prior to building the
    // parser.
    // But it must be set back to the original value for the produce() method.
    CsvHeader originalCsvHeader = conf.dataFormatConfig.csvHeader;
    if (originalCsvHeader != null && originalCsvHeader == CsvHeader.IGNORE_HEADER) {
      conf.dataFormatConfig.csvHeader = CsvHeader.NO_HEADER;
    }
    conf.dataFormatConfig.init(
        getContext(),
        conf.dataFormat,
        Groups.HADOOP_FS.name(),
        DATA_FROMAT_CONFIG_BEAN_PREFIX,
        issues);
    conf.dataFormatConfig.csvHeader = originalCsvHeader;

    parserFactory = conf.dataFormatConfig.getParserFactory();

    LOG.info("Issues: " + issues);
    return issues;
  }