/** * Builds the target file path as <datasource directory>/<month>.avro. If the target file already * exists, then it is open for appending, otherwise it is created */ private DataFileWriter<GenericRecord> openHDFSFile(DatasourceMonth datasourceMonth) throws IOException { DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(AVRO_SCHEMA)); writer.setSyncInterval(FILEWRITER_SYNC_INTERVAL); // writer.setCodec(CodecFactory.snappyCodec()); // omit for now Path targetPath = buildTargetPath(datasourceMonth); // just for logging String fullTargetPath = this.hdfs.getWorkingDirectory() + "/" + targetPath; // Append to an existing file, or create a new file is file otherwise if (this.hdfs.exists(targetPath)) { // appending to an existing file // based on // http://technicaltidbit.blogspot.com.es/2013/02/avro-can-append-in-hdfs-after-all.html if (debugMode) { this.hdfs.setReplication(targetPath, (short) 1); } LOGGER.info("Appending to existing file {}", fullTargetPath); OutputStream outputStream = this.hdfs.append(targetPath); writer.appendTo(new FsInput(targetPath, this.hadoopConf), outputStream); } else { // creating a new file LOGGER.info( "Creating new file " + fullTargetPath + " for datasource {} and month {}", datasourceMonth.datasource(), datasourceMonth.month()); OutputStream outputStream = this.hdfs.create(targetPath); writer.create(AVRO_SCHEMA, outputStream); } return writer; }
@Override public void prepare(Map conf, TridentOperationContext context) { try { String path = (String) conf.get("DOCUMENT_PATH"); schema = Schema.parse(PersistDocumentFunction.class.getResourceAsStream("/document.avsc")); File file = new File(path); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); if (file.exists()) dataFileWriter.appendTo(file); else dataFileWriter.create(schema, file); } catch (IOException e) { throw new RuntimeException(e); } }