예제 #1
0
  /**
   * Builds the target file path as <datasource directory>/<month>.avro. If the target file already
   * exists, then it is open for appending, otherwise it is created
   */
  private DataFileWriter<GenericRecord> openHDFSFile(DatasourceMonth datasourceMonth)
      throws IOException {
    DataFileWriter<GenericRecord> writer =
        new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(AVRO_SCHEMA));
    writer.setSyncInterval(FILEWRITER_SYNC_INTERVAL);
    // writer.setCodec(CodecFactory.snappyCodec()); // omit for now

    Path targetPath = buildTargetPath(datasourceMonth);
    // just for logging
    String fullTargetPath = this.hdfs.getWorkingDirectory() + "/" + targetPath;
    // Append to an existing file, or create a new file is file otherwise
    if (this.hdfs.exists(targetPath)) {
      // appending to an existing file
      // based on
      // http://technicaltidbit.blogspot.com.es/2013/02/avro-can-append-in-hdfs-after-all.html
      if (debugMode) {
        this.hdfs.setReplication(targetPath, (short) 1);
      }
      LOGGER.info("Appending to existing file {}", fullTargetPath);
      OutputStream outputStream = this.hdfs.append(targetPath);
      writer.appendTo(new FsInput(targetPath, this.hadoopConf), outputStream);
    } else {
      // creating a new file
      LOGGER.info(
          "Creating new file " + fullTargetPath + " for datasource {} and month {}",
          datasourceMonth.datasource(),
          datasourceMonth.month());
      OutputStream outputStream = this.hdfs.create(targetPath);
      writer.create(AVRO_SCHEMA, outputStream);
    }

    return writer;
  }
예제 #2
0
 private Path buildTargetPath(DatasourceMonth datasourceMonth) {
   return new Path(
       this.datasourcesDirectories.get(datasourceMonth.datasource())
           + "/"
           + datasourceMonth.month()
           + ".avro");
 }