/** Data cleansing method */
  public void cleanseData(AmazonS3 client) throws Exception {

    AwsDataLoader loader = new AwsDataLoader();
    CSVReader reader = null;
    String prefix = loader.getS3Prefix(source);
    client.setEndpoint(S3_ENDPOINT);
    S3Object object = client.getObject(new GetObjectRequest(BUCKET_NM, prefix));
    reader =
        new CSVReader(
            new BufferedReader(new InputStreamReader(object.getObjectContent())),
            CSVParser.DEFAULT_SEPARATOR,
            CSVParser.DEFAULT_QUOTE_CHARACTER,
            CSVParser.DEFAULT_ESCAPE_CHARACTER,
            HEADERS_LINE);
    ColumnPositionMappingStrategy<ProductLanguage> strat =
        new ColumnPositionMappingStrategy<ProductLanguage>();
    strat.setType(ProductLanguage.class);
    String[] columns =
        new String[] {"refId", "code", "name", "locale", "state", "format", "displayNameLanguage"};
    strat.setColumnMapping(columns);
    CsvToBean<ProductLanguage> csv = new CsvToBean<ProductLanguage>();
    list = csv.parse(strat, reader);

    System.out.println("ProductLanguageCleanser input size: " + list.size());

    this.updateDataset(list);

    BeanToCsv<ProductLanguage> csvWriter = new BeanToCsv<ProductLanguage>();
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    CSVWriter writer = new CSVWriter(new OutputStreamWriter(os), ',', '"');
    // strat.setColumnMapping(columns);
    log.info("List size: " + list.size());
    csvWriter.write(strat, writer, list);
    writer.flush();
    String dataset = os.toString();
    String outPrefix = PREFIX + OUTPUT_KEY + source + ".csv";

    client.setEndpoint(S3_ENDPOINT);
    ObjectMetadata omd = new ObjectMetadata();

    try {
      byte[] content = dataset.getBytes(StandardCharsets.UTF_8);
      ByteArrayInputStream input = new ByteArrayInputStream(content);
      BufferedReader buffReader = new BufferedReader(new InputStreamReader(input));
      buffReader.readLine();
      InputStream inputObj = new ReaderInputStream(buffReader);
      // omd.setContentLength(IOUtils.toByteArray(input).length);
      client.putObject(BUCKET_NM, outPrefix, inputObj, omd);
      input.close();
    } catch (IOException e) {
      log.error("Axon data write to s3 failed: " + e.getMessage());
    }
  }
Example #2
0
 /**
  * Method use OpenCsv Library for
  *
  * @param columnMapping Map allow the user to pass the column Names to a Field Names of the Class.
  * @param fileInputCsv the File CSV to parse.
  * @param <T> the generic variable.
  * @return the List of Bean parsed from the CSV file.
  */
 public static <T> List<T> parseCSVFileAsList(
     Map<String, String> columnMapping, File fileInputCsv) {
   try {
     HeaderColumnNameTranslateMappingStrategy<T> beanStrategy =
         new HeaderColumnNameTranslateMappingStrategy<>();
     // beanStrategy.setType(clazz); //deprecated
     /*Map<String, String> columnMapping = new HashMap<>();
     columnMapping.put("ID", "id");
     columnMapping.put("Name", "name");
     columnMapping.put("Role", "role");*/
     beanStrategy.setColumnMapping(columnMapping);
     CsvToBean<T> csvToBean = new CsvToBean<>();
     CSVReader reader = new CSVReader(new FileReader(fileInputCsv));
     return csvToBean.parse(beanStrategy, reader);
   } catch (IOException e) {
     logger.error(
         "Can't parse the CSV file:" + fileInputCsv.getAbsolutePath() + " -> " + e.getMessage(),
         e);
     return new ArrayList<>();
   }
 }