Example #1
0
 public static void validateExpression(
     ELEval elEvaluator,
     ELVars variables,
     String expression,
     Stage.Context context,
     String group,
     String config,
     ErrorCode err,
     Class<?> type,
     List<Stage.ConfigIssue> issues) {
   RecordEL.setRecordInContext(variables, context.createRecord("forValidation"));
   try {
     context.parseEL(expression);
     elEvaluator.eval(variables, expression, type);
   } catch (Exception ex) {
     issues.add(context.createConfigIssue(group, config, err, expression, ex.toString(), ex));
   }
 }
Example #2
0
 public static ELVars parseConstants(
     Map<String, ?> constants,
     Stage.Context context,
     String group,
     String config,
     ErrorCode err,
     List<Stage.ConfigIssue> issues) {
   ELVars variables = context.createELVars();
   if (constants != null) {
     for (Map.Entry<String, ?> entry : constants.entrySet()) {
       try {
         variables.addVariable(entry.getKey(), entry.getValue());
       } catch (Exception ex) {
         issues.add(context.createConfigIssue(group, config, err, constants, ex.toString(), ex));
       }
     }
   }
   return variables;
 }
Example #3
0
 private void validateConnection(Stage.Context context, List<Stage.ConfigIssue> issues) {
   // Access Key ID - username [unique in aws]
   // secret access key - password
   AWSCredentials credentials = new BasicAWSCredentials(accessKeyId, secretAccessKey);
   s3Client = new AmazonS3Client(credentials, new ClientConfiguration());
   s3Client.setS3ClientOptions(new S3ClientOptions().withPathStyleAccess(true));
   if (endPoint != null && !endPoint.isEmpty()) {
     s3Client.setEndpoint(endPoint);
   } else {
     s3Client.setRegion(Region.getRegion(region));
   }
   try {
     // check if the credentials are right by trying to list buckets
     s3Client.listBuckets();
   } catch (AmazonS3Exception e) {
     issues.add(
         context.createConfigIssue(
             Groups.S3.name(), "accessKeyId", Errors.S3_SPOOLDIR_20, e.toString()));
   }
 }
  /**
   * Returns a protobuf descriptor instance from the provided descriptor file.
   *
   * @param context Stage context used for finding the SDC resources directory
   * @param protoDescriptorFile Path to descriptor file relative to SDC_RESOURCES
   * @param messageType The name of the message to decode
   * @param messageTypeToExtensionMap Map of protobuf extensions required for decoding
   * @param defaultValueMap Map of default values to use for the message
   * @return protobuf descriptor instance
   * @throws StageException
   */
  public static Descriptors.Descriptor getDescriptor(
      Stage.Context context,
      String protoDescriptorFile,
      String messageType,
      Map<String, Set<Descriptors.FieldDescriptor>> messageTypeToExtensionMap,
      Map<String, Object> defaultValueMap)
      throws StageException {
    File descriptorFileHandle = new File(context.getResourcesDirectory(), protoDescriptorFile);
    try {
      FileInputStream fin = new FileInputStream(descriptorFileHandle);
      DescriptorProtos.FileDescriptorSet set = DescriptorProtos.FileDescriptorSet.parseFrom(fin);

      // Iterate over all the file descriptor set computed above and cache dependencies and all
      // encountered
      // file descriptors

      // this map holds all the dependencies that a given file descriptor has.
      // This cached map will be looked up while building FileDescriptor instances
      Map<String, Set<Descriptors.FileDescriptor>> fileDescriptorDependentsMap = new HashMap<>();
      // All encountered FileDescriptor instances cached based on their name.
      Map<String, Descriptors.FileDescriptor> fileDescriptorMap = new HashMap<>();
      ProtobufTypeUtil.getAllFileDescriptors(set, fileDescriptorDependentsMap, fileDescriptorMap);

      // Get the descriptor for the expected message type
      Descriptors.Descriptor descriptor =
          ProtobufTypeUtil.getDescriptor(set, fileDescriptorMap, protoDescriptorFile, messageType);

      // Compute and cache all extensions defined for each message type
      ProtobufTypeUtil.populateDefaultsAndExtensions(
          fileDescriptorMap, messageTypeToExtensionMap, defaultValueMap);
      return descriptor;
    } catch (FileNotFoundException e) {
      throw new StageException(Errors.PROTOBUF_06, descriptorFileHandle.getAbsolutePath(), e);
    } catch (IOException e) {
      throw new StageException(Errors.PROTOBUF_08, e.toString(), e);
    }
  }
  public void init(Stage.Context context, String prefix, List<Stage.ConfigIssue> issues) {
    conf = new Configuration();
    conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);

    if (hdfsKerberos) {
      conf.set(
          CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION,
          UserGroupInformation.AuthenticationMethod.KERBEROS.name());
      try {
        conf.set(
            DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY,
            "hdfs/_HOST@" + HadoopSecurityUtil.getDefaultRealm());
      } catch (Exception ex) {
        if (!hdfsConfigs.containsKey(DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY)) {
          issues.add(
              context.createConfigIssue(
                  Groups.HDFS.name(), null, HdfsMetadataErrors.HDFS_METADATA_001, ex.toString()));
        }
      }
    }

    if (hdfsConfDir != null && !hdfsConfDir.isEmpty()) {
      File hadoopConfigDir = new File(hdfsConfDir);
      if (!hadoopConfigDir.isAbsolute()) {
        hadoopConfigDir = new File(context.getResourcesDirectory(), hdfsConfDir).getAbsoluteFile();
      }
      if (!hadoopConfigDir.exists()) {
        issues.add(
            context.createConfigIssue(
                Groups.HDFS.name(),
                JOIN.join(prefix, "hdfsConfDir"),
                HdfsMetadataErrors.HDFS_METADATA_002,
                hadoopConfigDir.getPath()));
      } else if (!hadoopConfigDir.isDirectory()) {
        issues.add(
            context.createConfigIssue(
                Groups.HDFS.name(),
                JOIN.join(prefix, "hdfsConfDir"),
                HdfsMetadataErrors.HDFS_METADATA_003,
                hadoopConfigDir.getPath()));
      } else {
        File coreSite = new File(hadoopConfigDir, "core-site.xml");
        if (coreSite.exists()) {
          if (!coreSite.isFile()) {
            issues.add(
                context.createConfigIssue(
                    Groups.HDFS.name(),
                    JOIN.join(prefix, "hdfsConfDir"),
                    HdfsMetadataErrors.HDFS_METADATA_004,
                    coreSite.getPath()));
          }
          conf.addResource(new Path(coreSite.getAbsolutePath()));
        }
        File hdfsSite = new File(hadoopConfigDir, "hdfs-site.xml");
        if (hdfsSite.exists()) {
          if (!hdfsSite.isFile()) {
            issues.add(
                context.createConfigIssue(
                    Groups.HDFS.name(),
                    JOIN.join(prefix, "hdfsConfDir"),
                    HdfsMetadataErrors.HDFS_METADATA_004,
                    hdfsSite.getPath()));
          }
          conf.addResource(new Path(hdfsSite.getAbsolutePath()));
        }
      }
    }

    // Unless user specified non-empty, non-null HDFS URI, we need to retrieve it's value
    if (StringUtils.isEmpty(hdfsUri)) {
      hdfsUri = conf.get("fs.defaultFS");
    }

    for (Map.Entry<String, String> config : hdfsConfigs.entrySet()) {
      conf.set(config.getKey(), config.getValue());
    }

    try {
      loginUgi = HadoopSecurityUtil.getLoginUser(conf);
    } catch (IOException e) {
      LOG.error("Can't create login UGI", e);
      issues.add(
          context.createConfigIssue(
              Groups.HDFS.name(), null, HdfsMetadataErrors.HDFS_METADATA_005, e.getMessage(), e));
    }

    if (!issues.isEmpty()) {
      return;
    }

    try {
      fs =
          getUGI()
              .doAs(
                  new PrivilegedExceptionAction<FileSystem>() {
                    @Override
                    public FileSystem run() throws Exception {
                      return FileSystem.newInstance(new URI(hdfsUri), conf);
                    }
                  });
    } catch (Exception ex) {
      LOG.error("Can't retrieve FileSystem instance", ex);
      issues.add(
          context.createConfigIssue(
              Groups.HDFS.name(), null, HdfsMetadataErrors.HDFS_METADATA_005, ex.getMessage(), ex));
    }
  }
  public boolean init(
      Stage.Context context,
      DataFormat dataFormat,
      String groupName,
      String configName,
      List<Stage.ConfigIssue> issues) {
    boolean valid = true;
    switch (dataFormat) {
      case TEXT:
        // required field configuration to be set and it is "/" by default
        if (textFieldPath == null || textFieldPath.isEmpty()) {
          issues.add(
              context.createConfigIssue(
                  DataFormatGroups.TEXT.name(), "fieldPath", DataFormatErrors.DATA_FORMAT_200));
          valid = false;
        }
        break;
      case BINARY:
        // required field configuration to be set and it is "/" by default
        if (binaryFieldPath == null || binaryFieldPath.isEmpty()) {
          issues.add(
              context.createConfigIssue(
                  DataFormatGroups.BINARY.name(), "fieldPath", DataFormatErrors.DATA_FORMAT_200));
          valid = false;
        }
        break;
      case JSON:
      case DELIMITED:
      case SDC_JSON:
      case AVRO:
        // no-op
        break;
      case PROTOBUF:
        if (protoDescriptorFile == null || protoDescriptorFile.isEmpty()) {
          issues.add(
              context.createConfigIssue(
                  DataFormatGroups.PROTOBUF.name(),
                  "protoDescriptorFile",
                  DataFormatErrors.DATA_FORMAT_07));
          valid = false;
        } else {
          File file = new File(context.getResourcesDirectory(), protoDescriptorFile);
          if (!file.exists()) {
            issues.add(
                context.createConfigIssue(
                    DataFormatGroups.PROTOBUF.name(),
                    "protoDescriptorFile",
                    DataFormatErrors.DATA_FORMAT_09,
                    file.getAbsolutePath()));
            valid = false;
          }
          if (messageType == null || messageType.isEmpty()) {
            issues.add(
                context.createConfigIssue(
                    DataFormatGroups.PROTOBUF.name(),
                    "messageType",
                    DataFormatErrors.DATA_FORMAT_08));
            valid = false;
          }
        }
        break;
      default:
        issues.add(
            context.createConfigIssue(
                groupName, configName, DataFormatErrors.DATA_FORMAT_04, dataFormat));
        valid = false;
    }

    valid &= validateDataGenerator(context, dataFormat, groupName, issues);

    return valid;
  }
  private boolean validateDataGenerator(
      Stage.Context context,
      DataFormat dataFormat,
      String groupName,
      List<Stage.ConfigIssue> issues) {
    boolean valid = true;

    DataGeneratorFactoryBuilder builder =
        new DataGeneratorFactoryBuilder(context, dataFormat.getGeneratorFormat());
    if (charset == null || charset.trim().isEmpty()) {
      charset = CHARSET_UTF8;
    }

    Charset cSet;
    try {
      cSet = Charset.forName(charset);
    } catch (UnsupportedCharsetException ex) {
      // setting it to a valid one so the parser factory can be configured and tested for more
      // errors
      cSet = StandardCharsets.UTF_8;
      issues.add(
          context.createConfigIssue(
              groupName, "charset", DataFormatErrors.DATA_FORMAT_05, charset));
      valid &= false;
    }

    builder.setCharset(cSet);

    switch (dataFormat) {
      case SDC_JSON:
        break;
      case DELIMITED:
        builder.setMode(csvFileFormat);
        builder.setMode(csvHeader);
        builder.setConfig(DelimitedDataGeneratorFactory.REPLACE_NEWLINES_KEY, csvReplaceNewLines);
        builder.setConfig(DelimitedDataConstants.DELIMITER_CONFIG, csvCustomDelimiter);
        builder.setConfig(DelimitedDataConstants.ESCAPE_CONFIG, csvCustomEscape);
        builder.setConfig(DelimitedDataConstants.QUOTE_CONFIG, csvCustomQuote);
        break;
      case TEXT:
        builder.setConfig(TextDataGeneratorFactory.FIELD_PATH_KEY, textFieldPath);
        builder.setConfig(TextDataGeneratorFactory.EMPTY_LINE_IF_NULL_KEY, textEmptyLineIfNull);
        break;
      case JSON:
        builder.setMode(jsonMode);
        break;
      case AVRO:
        Schema schema = null;
        Map<String, Object> defaultValues = new HashMap<>();
        try {
          schema =
              new Schema.Parser().setValidate(true).setValidateDefaults(true).parse(avroSchema);
        } catch (Exception e) {
          issues.add(
              context.createConfigIssue(
                  DataFormatGroups.AVRO.name(),
                  "avroSchema",
                  DataFormatErrors.DATA_FORMAT_300,
                  e.toString(),
                  e));
          valid &= false;
        }
        if (schema != null) {
          try {
            defaultValues.putAll(
                AvroTypeUtil.getDefaultValuesFromSchema(schema, new HashSet<String>()));
          } catch (IOException e) {
            issues.add(
                context.createConfigIssue(
                    DataFormatGroups.AVRO.name(),
                    "avroSchema",
                    DataFormatErrors.DATA_FORMAT_301,
                    e.toString(),
                    e));
            valid &= false;
          }
        }
        builder.setConfig(AvroDataGeneratorFactory.SCHEMA_KEY, avroSchema);
        builder.setConfig(AvroDataGeneratorFactory.INCLUDE_SCHEMA_KEY, includeSchema);
        builder.setConfig(AvroDataGeneratorFactory.DEFAULT_VALUES_KEY, defaultValues);
        break;
      case BINARY:
        builder.setConfig(BinaryDataGeneratorFactory.FIELD_PATH_KEY, binaryFieldPath);
        break;
      case PROTOBUF:
        builder
            .setConfig(ProtobufConstants.PROTO_DESCRIPTOR_FILE_KEY, protoDescriptorFile)
            .setConfig(ProtobufConstants.MESSAGE_TYPE_KEY, messageType);
        break;
      default:
        // no action needed
        break;
    }
    if (valid) {
      try {
        dataGeneratorFactory = builder.build();
      } catch (Exception ex) {
        issues.add(
            context.createConfigIssue(
                null, null, DataFormatErrors.DATA_FORMAT_201, ex.toString(), ex));
        valid &= false;
      }
    }
    return valid;
  }