private void writeContainer(Record src, OutputStream dst) {
   DataFileWriter dataFileWriter = null;
   try {
     try {
       Schema schema = null;
       for (Object attachment : src.get(Fields.ATTACHMENT_BODY)) {
         Preconditions.checkNotNull(attachment);
         GenericContainer datum = (GenericContainer) attachment;
         schema = getSchema(datum, schema);
         assert schema != null;
         if (dataFileWriter == null) { // init
           GenericDatumWriter datumWriter = new GenericDatumWriter(schema);
           dataFileWriter = new DataFileWriter(datumWriter);
           if (codecFactory != null) {
             dataFileWriter.setCodec(codecFactory);
           }
           for (Map.Entry<String, String> entry : metadata.entrySet()) {
             dataFileWriter.setMeta(entry.getKey(), entry.getValue());
           }
           dataFileWriter.create(schema, dst);
         }
         dataFileWriter.append(datum);
       }
       if (dataFileWriter != null) {
         dataFileWriter.flush();
       }
     } catch (IOException e) {
       throw new MorphlineRuntimeException(e);
     }
   } finally {
     Closeables.closeQuietly(dataFileWriter);
   }
 }
Пример #2
0
  /**
   * Generates an input Avro file containing the given records in the temporary directory and
   * returns the full path of the file.
   */
  private <T> String generateTestFile(
      String filename,
      List<T> elems,
      SyncBehavior syncBehavior,
      int syncInterval,
      AvroCoder<T> coder,
      String codec)
      throws IOException {
    Random random = new Random(0);
    File tmpFile = tmpFolder.newFile(filename);
    String path = tmpFile.toString();

    FileOutputStream os = new FileOutputStream(tmpFile);
    DatumWriter<T> datumWriter = coder.createDatumWriter();
    try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) {
      writer.setCodec(CodecFactory.fromString(codec));
      writer.create(coder.getSchema(), os);

      int recordIndex = 0;
      int syncIndex = syncBehavior == SyncBehavior.SYNC_RANDOM ? random.nextInt(syncInterval) : 0;

      for (T elem : elems) {
        writer.append(elem);
        recordIndex++;

        switch (syncBehavior) {
          case SYNC_REGULAR:
            if (recordIndex == syncInterval) {
              recordIndex = 0;
              writer.sync();
            }
            break;
          case SYNC_RANDOM:
            if (recordIndex == syncIndex) {
              recordIndex = 0;
              writer.sync();
              syncIndex = random.nextInt(syncInterval);
            }
            break;
          case SYNC_DEFAULT:
          default:
        }
      }
    }
    return path;
  }
  @Override
  public void onTrigger(ProcessContext context, final ProcessSession session)
      throws ProcessException {
    FlowFile incomingAvro = session.get();
    if (incomingAvro == null) {
      return;
    }

    String inputSchemaProperty =
        context.getProperty(INPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
    final Schema inputSchema;
    try {
      inputSchema = getSchema(inputSchemaProperty, DefaultConfiguration.get());
    } catch (SchemaNotFoundException e) {
      getLogger().error("Cannot find schema: " + inputSchemaProperty);
      session.transfer(incomingAvro, FAILURE);
      return;
    }
    String outputSchemaProperty =
        context.getProperty(OUTPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
    final Schema outputSchema;
    try {
      outputSchema = getSchema(outputSchemaProperty, DefaultConfiguration.get());
    } catch (SchemaNotFoundException e) {
      getLogger().error("Cannot find schema: " + outputSchemaProperty);
      session.transfer(incomingAvro, FAILURE);
      return;
    }
    final Map<String, String> fieldMapping = new HashMap<>();
    for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
      if (entry.getKey().isDynamic()) {
        fieldMapping.put(entry.getKey().getName(), entry.getValue());
      }
    }
    final AvroRecordConverter converter =
        new AvroRecordConverter(inputSchema, outputSchema, fieldMapping);

    final DataFileWriter<Record> writer =
        new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
    writer.setCodec(CodecFactory.snappyCodec());

    final DataFileWriter<Record> failureWriter =
        new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
    failureWriter.setCodec(CodecFactory.snappyCodec());

    try {
      final LongHolder written = new LongHolder(0L);
      final FailureTracker failures = new FailureTracker();

      final List<Record> badRecords = Lists.newLinkedList();
      FlowFile incomingAvroCopy = session.clone(incomingAvro);
      FlowFile outgoingAvro =
          session.write(
              incomingAvro,
              new StreamCallback() {
                @Override
                public void process(InputStream in, OutputStream out) throws IOException {
                  try (DataFileStream<Record> stream =
                      new DataFileStream<Record>(
                          in, new GenericDatumReader<Record>(converter.getInputSchema()))) {
                    try (DataFileWriter<Record> w = writer.create(outputSchema, out)) {
                      for (Record record : stream) {
                        try {
                          Record converted = converter.convert(record);
                          w.append(converted);
                          written.incrementAndGet();
                        } catch (AvroConversionException e) {
                          failures.add(e);
                          getLogger().error("Error converting data: " + e.getMessage());
                          badRecords.add(record);
                        }
                      }
                    }
                  }
                }
              });

      FlowFile badOutput =
          session.write(
              incomingAvroCopy,
              new StreamCallback() {
                @Override
                public void process(InputStream in, OutputStream out) throws IOException {

                  try (DataFileWriter<Record> w = failureWriter.create(inputSchema, out)) {
                    for (Record record : badRecords) {
                      w.append(record);
                    }
                  }
                }
              });

      long errors = failures.count();

      // update only if file transfer is successful
      session.adjustCounter("Converted records", written.get(), false);
      // update only if file transfer is successful
      session.adjustCounter("Conversion errors", errors, false);

      if (written.get() > 0L) {
        session.transfer(outgoingAvro, SUCCESS);
      } else {
        session.remove(outgoingAvro);

        if (errors == 0L) {
          badOutput = session.putAttribute(badOutput, "errors", "No incoming records");
          session.transfer(badOutput, FAILURE);
        }
      }

      if (errors > 0L) {
        getLogger()
            .warn(
                "Failed to convert {}/{} records between Avro Schemas",
                new Object[] {errors, errors + written.get()});
        badOutput = session.putAttribute(badOutput, "errors", failures.summary());
        session.transfer(badOutput, FAILURE);
      } else {
        session.remove(badOutput);
      }
    } catch (ProcessException | DatasetIOException e) {
      getLogger().error("Failed reading or writing", e);
      session.transfer(incomingAvro, FAILURE);
    } catch (DatasetException e) {
      getLogger().error("Failed to read FlowFile", e);
      session.transfer(incomingAvro, FAILURE);
    }
  }