Esempio n. 1
0
  /**
   * Builds the target file path as <datasource directory>/<month>.avro. If the target file already
   * exists, then it is open for appending, otherwise it is created
   */
  private DataFileWriter<GenericRecord> openHDFSFile(DatasourceMonth datasourceMonth)
      throws IOException {
    DataFileWriter<GenericRecord> writer =
        new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(AVRO_SCHEMA));
    writer.setSyncInterval(FILEWRITER_SYNC_INTERVAL);
    // writer.setCodec(CodecFactory.snappyCodec()); // omit for now

    Path targetPath = buildTargetPath(datasourceMonth);
    // just for logging
    String fullTargetPath = this.hdfs.getWorkingDirectory() + "/" + targetPath;
    // Append to an existing file, or create a new file is file otherwise
    if (this.hdfs.exists(targetPath)) {
      // appending to an existing file
      // based on
      // http://technicaltidbit.blogspot.com.es/2013/02/avro-can-append-in-hdfs-after-all.html
      if (debugMode) {
        this.hdfs.setReplication(targetPath, (short) 1);
      }
      LOGGER.info("Appending to existing file {}", fullTargetPath);
      OutputStream outputStream = this.hdfs.append(targetPath);
      writer.appendTo(new FsInput(targetPath, this.hadoopConf), outputStream);
    } else {
      // creating a new file
      LOGGER.info(
          "Creating new file " + fullTargetPath + " for datasource {} and month {}",
          datasourceMonth.datasource(),
          datasourceMonth.month());
      OutputStream outputStream = this.hdfs.create(targetPath);
      writer.create(AVRO_SCHEMA, outputStream);
    }

    return writer;
  }
 @Override
 public void prepare(Map conf, TridentOperationContext context) {
   try {
     String path = (String) conf.get("DOCUMENT_PATH");
     schema = Schema.parse(PersistDocumentFunction.class.getResourceAsStream("/document.avsc"));
     File file = new File(path);
     DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
     dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
     if (file.exists()) dataFileWriter.appendTo(file);
     else dataFileWriter.create(schema, file);
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
 }