Example #1
0
public class SearchService {

  private static final Logger logger = LoggerFactory.getLogger(SearchService.class.getName());

  @Context ServletConfig servletConfig;

  @POST
  @Produces({"application/xhtml+xml; charset=UTF-8"})
  public StreamingOutput postXHTML(
      @QueryParam("q") final String query,
      @QueryParam("from") final int from,
      @QueryParam("size") final int size,
      @QueryParam("service") final String service)
      throws Exception {
    return new StreamingOutput() {
      @Override
      public void write(OutputStream output) throws IOException, WebApplicationException {
        try {
          ZClient client = ZClientFactory.newZClient(service);
          ZSearchRetrieveRequest request =
              client.newCQLSearchRetrieveRequest().setQuery(query).setFrom(from).setSize(size);
          ZSearchRetrieveResponse response = request.execute();
          StylesheetTransformer transformer = new StylesheetTransformer("xsl");
          response
              .setStylesheetTransformer(transformer)
              .setOutputFormat(OutputFormat.XHTML)
              .to(new OutputStreamWriter(output, "UTF-8"));
          client.close();
        } catch (Diagnostics d) {
          logger.error(d.getMessage(), d);
          throw new IOException(d);
        } catch (IOException e) {
          logger.error(e.getMessage(), e);
          throw new IOException(e);
        }
      }
    };
  }
}
Example #2
0
public class MarcXmlReaderTest {

  private static final Logger logger = LoggerFactory.getLogger(MarcXmlReaderTest.class.getName());

  @Test
  public void testMarcXMLFromOAI() throws Exception {
    InputStream in = getClass().getResourceAsStream("zdb-oai-marc.xml");
    if (in == null) {
      throw new IOException("input stream not found");
    }
    InputSource source = new InputSource(new InputStreamReader(in, "UTF-8"));
    MarcXmlReader reader = new MarcXmlReader(source);
    reader.setListener(
        new MarcXchangeListener() {

          @Override
          public void leader(String label) {
            logger.debug("leader=" + label);
          }

          @Override
          public void beginRecord(String format, String type) {
            logger.debug("beginRecord format=" + format + " type=" + type);
          }

          @Override
          public void beginControlField(Field field) {
            logger.debug("beginControlField field=" + field);
          }

          @Override
          public void endControlField(Field field) {
            logger.debug("endControlField field=" + field);
          }

          @Override
          public void beginDataField(Field field) {
            logger.debug("beginDataField field=" + field);
          }

          @Override
          public void endDataField(Field field) {
            logger.debug("endDataField field=" + field);
          }

          @Override
          public void beginSubField(Field field) {
            logger.debug("beginSubField field=" + field);
          }

          @Override
          public void endSubField(Field field) {
            logger.debug("endsubField field=" + field);
          }

          @Override
          public void endRecord() {
            logger.debug("endRecord");
          }

          @Override
          public void trailer(String trailer) {
            logger.debug("trailer " + trailer);
          }
        });
    reader.parse();
  }
}
Example #3
0
/** Push Springer citations to Elasticsearch */
public class SpringerCitations extends AbstractImporter<Long, AtomicLong> {

  private static final Logger logger = LoggerFactory.getLogger(SpringerCitations.class.getName());

  private static final String lf = System.getProperty("line.separator");

  private static Queue<URI> input;

  private static final AtomicLong fileCounter = new AtomicLong(0L);

  private final SimpleResourceContext resourceContext = new SimpleResourceContext();

  private static String index;

  private static String type;

  private ElementOutput out;

  private boolean done = false;

  public static void main(String[] args) {
    int exitcode = 0;
    try {
      OptionParser parser =
          new OptionParser() {
            {
              accepts("elasticsearch").withRequiredArg().ofType(String.class).required();
              accepts("index").withRequiredArg().ofType(String.class).required();
              accepts("type").withRequiredArg().ofType(String.class).required();
              accepts("maxbulkactions").withRequiredArg().ofType(Integer.class).defaultsTo(1000);
              accepts("maxconcurrentbulkrequests")
                  .withRequiredArg()
                  .ofType(Integer.class)
                  .defaultsTo(4 * Runtime.getRuntime().availableProcessors());
              accepts("mock").withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);
              accepts("path").withRequiredArg().ofType(String.class).required();
              accepts("pattern")
                  .withRequiredArg()
                  .ofType(String.class)
                  .required()
                  .defaultsTo("*.txt");
              accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(1);
              accepts("help");
            }
          };
      final OptionSet options = parser.parse(args);
      if (options.hasArgument("help")) {
        System.err.println(
            "Help for "
                + Medline.class.getCanonicalName()
                + lf
                + " --help                 print this help message"
                + lf
                + " --elasticsearch <uri>  Elasticesearch URI"
                + lf
                + " --index <index>        Elasticsearch index name"
                + lf
                + " --type <type>          Elasticsearch type name"
                + lf
                + " --maxbulkactions <n>   the number of bulk actions per request (optional, default: 1000)"
                + " --maxconcurrentbulkrequests <n>the number of concurrent bulk requests (optional, default: 4 * cpu cores)"
                + " --path <path>          a file path from where the input files are recursively collected (required)"
                + lf
                + " --pattern <pattern>    a regex for selecting matching file names for input (default: *.txt)"
                + lf
                + " --threads <n>          the number of threads (optional, default: <num-of=cpus)");
        System.exit(1);
      }
      input =
          new Finder((String) options.valueOf("pattern"))
              .find((String) options.valueOf("path"))
              .getURIs();
      final Integer threads = (Integer) options.valueOf("threads");

      logger.info("found {} input files", input.size());

      URI esURI = URI.create((String) options.valueOf("elasticsearch"));
      index = (String) options.valueOf("index");
      type = (String) options.valueOf("type");
      int maxbulkactions = (Integer) options.valueOf("maxbulkactions");
      int maxconcurrentbulkrequests = (Integer) options.valueOf("maxconcurrentbulkrequests");
      boolean mock = (Boolean) options.valueOf("mock");

      final IngestClient es = mock ? new MockIngestClient() : new IngestClient();

      es.maxBulkActions(maxbulkactions)
          .maxConcurrentBulkRequests(maxconcurrentbulkrequests)
          .newClient(esURI)
          .waitForCluster(ClusterHealthStatus.YELLOW, TimeValue.timeValueSeconds(30));

      logger.info("creating new index ...");
      es.setIndex(index).setType(type).newIndex();
      logger.info("... new index created");

      final ResourceSink sink = new ResourceSink(es);

      ImportService service =
          new ImportService()
              .threads(threads)
              .factory(
                  new ImporterFactory() {
                    @Override
                    public Importer newImporter() {
                      return new SpringerCitations(sink);
                    }
                  })
              .execute();

      logger.info(
          "finished, number of files = {}, resources indexed = {}", fileCounter, sink.getCounter());

      service.shutdown();
      logger.info("service shutdown");

      es.shutdown();
      logger.info("elasticsearch client shutdown");

    } catch (IOException | InterruptedException | ExecutionException e) {
      logger.error(e.getMessage(), e);
      exitcode = 1;
    }
    System.exit(exitcode);
  }

  public SpringerCitations(ElementOutput out) {
    this.out = out;
  }

  @Override
  public void close() throws IOException {
    // do not clear input
  }

  @Override
  public boolean hasNext() {
    if (input.isEmpty()) {
      done = true;
    }
    return !done;
  }

  @Override
  public AtomicLong next() {
    if (done) {
      return fileCounter;
    }
    try {
      URI uri = input.poll();
      if (uri != null) {
        push(uri);
      } else {
        done = true;
      }
      fileCounter.incrementAndGet();
    } catch (Exception e) {
      logger.error(e.getMessage(), e);
      done = true;
    }
    return fileCounter;
  }

  private IRI FABIO_ARTICLE = IRI.create("fabio:Article");

  private IRI FABIO_JOURNAL = IRI.create("fabio:Journal");

  private IRI FABIO_PERIODICAL_VOLUME = IRI.create("fabio:PeriodicalVolume");

  private IRI FABIO_PERIODICAL_ISSUE = IRI.create("fabio:PeriodicalIssue");

  private IRI FABIO_PRINT_OBJECT = IRI.create("fabio:PrintObject");

  private IRI FRBR_PARTOF = IRI.create("frbr:partOf");

  private IRI FRBR_EMBODIMENT = IRI.create("frbr:embodiment");

  private static final TextFileConnectionFactory factory = new TextFileConnectionFactory();

  private void push(URI uri) throws Exception {
    if (uri == null) {
      return;
    }
    InputStream in = factory.open(uri);
    if (in == null) {
      throw new IOException("unable to open " + uri);
    }
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"))) {
      String title = null;
      List<String> author = new LinkedList();
      String year = null;
      String journal = null;
      String issn = null;
      String volume = null;
      String issue = null;
      String pagination = null;
      String doi = null;
      String publisher = null;
      String line;
      while ((line = reader.readLine()) != null) {
        if (line.isEmpty()) {
          continue;
        }
        if ('%' != line.charAt(0)) {
          continue;
        }
        char ch = line.charAt(1);
        switch (ch) {
          case 'D':
            {
              year = line.substring(3).trim();
              break;
            }
          case 'T':
            {
              title = line.substring(3).trim();
              break;
            }
          case '@':
            {
              issn = line.substring(3).trim();
              break;
            }
          case 'J':
            {
              journal = line.substring(3).trim();
              break;
            }
          case 'A':
            {
              author.add(line.substring(3).trim());
              break;
            }
          case 'V':
            {
              volume = line.substring(3).trim();
              break;
            }
          case 'N':
            {
              issue = line.substring(3).trim();
              break;
            }
          case 'P':
            {
              pagination = line.substring(3).trim();
              break;
            }
          case 'R':
            {
              doi = line.substring(3).trim();
              break;
            }
          case 'I':
            {
              publisher = line.substring(3).trim();
              break;
            }
          case 'U':
            {
              // URL (DOI resolver)
              break;
            }
          case 'K':
            {
              // keywords
              break;
            }
          case '0':
            {
              // record type
              break;
            }
          case '8':
            {
              // day
              break;
            }
          case 'G':
            {
              // language
              break;
            }
          default:
            {
              logger.warn("unknown tag: " + line);
            }
        }
      }
      // create bibliographic key

      String key =
          author.isEmpty()
              ? null
              : new WorkAuthor().authorName(author.get(0)).workName(title).createIdentifier();

      IRI dereferencable =
          IRI.builder().scheme("http").host("xbib.info").path("/doi/").fragment(doi).build();

      Resource r =
          resourceContext
              .newResource()
              .id(dereferencable)
              .a(FABIO_ARTICLE)
              .add("xbib:key", key)
              .add("prism:doi", doi)
              .add("dc:title", title);
      for (String a : author) {
        r.add("dc:creator", a);
      }
      r.add("prism:publicationDate", new SimpleLiteral<>(year).type(Literal.GYEAR));
      r.newResource(FRBR_EMBODIMENT).a(FABIO_PERIODICAL_VOLUME).add("prism:volume", volume);
      r.newResource(FRBR_EMBODIMENT).a(FABIO_PERIODICAL_ISSUE).add("prism:number", issue);
      r.newResource(FRBR_EMBODIMENT).a(FABIO_PRINT_OBJECT).add("prism:pageRange", pagination);
      r.newResource(FRBR_PARTOF)
          .a(FABIO_JOURNAL)
          .add("prism:publicationName", journal)
          .add("prism:issn", issn)
          .add("dc:publisher", publisher);
      resourceContext
          .resource()
          .id(
              IRI.builder()
                  .scheme("http")
                  .host(index)
                  .query(type)
                  .fragment(resourceContext.resource().id().getFragment())
                  .build());
      out.output(resourceContext, resourceContext.contentBuilder());
    }
  }
}
/** A Sax adapter for MarcXchange */
public class MarcXchangeSaxAdapter implements MarcXchangeConstants, MarcXchangeListener {

  private static final Logger logger =
      LoggerFactory.getLogger(MarcXchangeSaxAdapter.class.getName());

  private static final AttributesImpl EMPTY_ATTRIBUTES = new AttributesImpl();

  private static final CharStreamFactory factory = CharStreamFactory.getInstance();

  private final CharStreamListener streamListener = new Iso2709StreamListener();

  private CharStream stream;

  private char mark = '\u0000';

  private int position = 0;

  private FieldDirectory directory;

  private Field designator;

  private RecordLabel label;

  private boolean datafieldOpen;

  private boolean subfieldOpen;

  private boolean recordOpen;

  private String schema;

  private String format;

  private String type;

  private String id;

  private String nsUri;

  private ContentHandler contentHandler;

  private MarcXchangeListener listener;

  private boolean fatalerrors = false;

  private boolean silenterrors = false;

  private int buffersize = 8192;

  public MarcXchangeSaxAdapter() {
    this.nsUri = NS_URI;
    this.subfieldOpen = false;
    this.recordOpen = false;
  }

  public MarcXchangeSaxAdapter buffersize(int buffersize) {
    this.buffersize = buffersize;
    return this;
  }

  public MarcXchangeSaxAdapter inputSource(final InputSource source) throws IOException {
    if (source.getByteStream() != null) {
      String encoding = source.getEncoding() != null ? source.getEncoding() : "ANSEL";
      Reader reader = new InputStreamReader(source.getByteStream(), encoding);
      this.stream = factory.newStream(reader, buffersize, streamListener);
    } else {
      Reader reader = source.getCharacterStream();
      this.stream = factory.newStream(reader, buffersize, streamListener);
    }
    return this;
  }

  public MarcXchangeSaxAdapter setContentHandler(ContentHandler handler) {
    this.contentHandler = handler;
    return this;
  }

  public MarcXchangeSaxAdapter setListener(MarcXchangeListener listener) {
    this.listener = listener;
    return this;
  }

  public MarcXchangeSaxAdapter setSchema(String schema) {
    this.schema = schema;
    return this;
  }

  public MarcXchangeSaxAdapter setFormat(String format) {
    this.format = format;
    return this;
  }

  public MarcXchangeSaxAdapter setType(String type) {
    this.type = type;
    return this;
  }

  public MarcXchangeSaxAdapter setFatalErrors(Boolean fatalerrors) {
    this.fatalerrors = fatalerrors;
    return this;
  }

  public MarcXchangeSaxAdapter setSilentErrors(Boolean silenterrors) {
    this.silenterrors = silenterrors;
    return this;
  }

  public String getIdentifier() {
    return id;
  }

  /** Parse ISO 2709 and emit SAX events. */
  public void parse() throws IOException, SAXException {
    beginCollection();
    String chunk;
    do {
      chunk = stream.readData();
    } while (chunk != null);
    stream.close();
    endCollection();
  }

  public void beginCollection() throws SAXException {
    if (contentHandler == null) {
      logger.warn("no content handler set");
      return;
    }
    contentHandler.startDocument();
    // write schema info
    AttributesImpl attrs = new AttributesImpl();
    if ("MARC21".equalsIgnoreCase(schema)) {
      this.nsUri = MARC21_NS_URI;
      attrs.addAttribute(
          XMLNS.NS_URI, XSI.NS_PREFIX, XMLNS.NS_PREFIX + ":" + XSI.NS_PREFIX, "CDATA", XSI.NS_URI);
      attrs.addAttribute(
          XSI.NS_URI,
          "schemaLocation",
          XSI.NS_PREFIX + ":schemaLocation",
          "CDATA",
          MARC21_NS_URI + " " + MARC21_SCHEMA);

    } else {
      this.nsUri = NS_URI;
      attrs.addAttribute(
          XMLNS.NS_URI, XSI.NS_PREFIX, XMLNS.NS_PREFIX + ":" + XSI.NS_PREFIX, "CDATA", XSI.NS_URI);
      attrs.addAttribute(
          XSI.NS_URI,
          "schemaLocation",
          XSI.NS_PREFIX + ":schemaLocation",
          "CDATA",
          NS_URI + " " + MARCXCHANGE_SCHEMA);
    }
    contentHandler.startPrefixMapping("", nsUri);
    contentHandler.startElement(nsUri, COLLECTION, COLLECTION, attrs);
  }

  public void endCollection() throws SAXException {
    if (contentHandler == null) {
      logger.warn("no content handler set");
      return;
    }
    contentHandler.endElement(nsUri, COLLECTION, COLLECTION);
    contentHandler.endDocument();
  }

  @Override
  public void beginRecord(String format, String type) {
    if (recordOpen) {
      return;
    }
    try {
      AttributesImpl attrs = new AttributesImpl();
      if (format != null && !"MARC21".equalsIgnoreCase(schema)) {
        attrs.addAttribute(nsUri, FORMAT, FORMAT, "CDATA", format);
      }
      if (type != null) {
        attrs.addAttribute(nsUri, TYPE, TYPE, "CDATA", type);
      }
      if (contentHandler != null) {
        contentHandler.startElement(nsUri, RECORD, RECORD, attrs);
      }
      if (listener != null) {
        listener.beginRecord(format, type);
      }
      this.recordOpen = true;
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void endRecord() {
    if (!recordOpen) {
      return;
    }
    try {
      if (listener != null) {
        listener.endRecord();
      }
      if (contentHandler != null) {
        contentHandler.endElement(nsUri, RECORD, RECORD);
      }
      if (listener != null) {
        // emit trailer event, drives record output segmentation
        listener.trailer(null);
      }
      this.recordOpen = false;
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void leader(String value) {
    if (value == null) {
      return;
    }
    try {
      if (contentHandler != null) {
        contentHandler.startElement(nsUri, LEADER, LEADER, EMPTY_ATTRIBUTES);
        contentHandler.characters(value.toCharArray(), 0, value.length());
        contentHandler.endElement(nsUri, LEADER, LEADER);
      }
      if (listener != null) {
        listener.leader(value);
      }
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void trailer(String trailer) {
    // do nothing, MARC reading defines no trailer
  }

  @Override
  public void beginControlField(Field designator) {
    if (designator == null) {
      return;
    }
    try {
      AttributesImpl attrs = new AttributesImpl();
      attrs.addAttribute(nsUri, TAG, TAG, "CDATA", designator.tag());
      if (contentHandler != null) {
        contentHandler.startElement(nsUri, CONTROLFIELD, CONTROLFIELD, attrs);
      }
      if (listener != null) {
        listener.beginControlField(designator);
      }
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void endControlField(Field designator) {
    try {
      if (listener != null) {
        listener.endControlField(designator);
      }
      if (designator != null) {
        String value = designator.data();
        if (!value.isEmpty()) {
          switch (designator.tag()) {
            case "001":
              this.id = value;
              break;
            case "006":
            case "007":
            case "008":
              // fix fill characters here
              value = value.replace('^', '|');
              break;
          }
          if (contentHandler != null) {
            contentHandler.characters(value.toCharArray(), 0, value.length());
          }
        }
      }
      if (contentHandler != null) {
        contentHandler.endElement(nsUri, CONTROLFIELD, CONTROLFIELD);
      }
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void beginDataField(Field designator) {
    if (designator == null) {
      return;
    }
    try {
      if (designator.isControlField()) {
        beginControlField(designator);
        endControlField(designator);
        return;
      }
      if (datafieldOpen) {
        return;
      }
      AttributesImpl attrs = new AttributesImpl();
      String tag = designator.tag();
      if (tag == null || tag.length() == 0) {
        tag = Field.NULL_TAG; // fallback
        designator.tag(tag);
      }
      attrs.addAttribute(nsUri, TAG, TAG, "CDATA", tag);
      int ind = designator.indicator() != null ? designator.indicator().length() : 0;
      // force at least two default blank indicators if schema is Marc 21
      if ("MARC21".equalsIgnoreCase(schema)) {
        for (int i = (ind == 0 ? 1 : ind); i <= 2; i++) {
          attrs.addAttribute(null, IND + i, IND + i, "CDATA", " ");
        }
      }
      // set indicators
      for (int i = 1; i <= ind; i++) {
        attrs.addAttribute(
            null, IND + i, IND + i, "CDATA", designator.indicator().substring(i - 1, i));
      }
      if (contentHandler != null) {
        contentHandler.startElement(nsUri, DATAFIELD, DATAFIELD, attrs);
      }
      if (listener != null) {
        listener.beginDataField(designator);
      }
      datafieldOpen = true;
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void endDataField(Field designator) {
    try {
      if (!datafieldOpen) {
        return;
      }
      if (listener != null) {
        listener.endDataField(designator);
      }
      if (designator != null) {
        String value = designator.data();
        if (value != null && !value.isEmpty()) {
          value = normalizeValue(value);
          // write data field per default into a subfield with code 'a'
          AttributesImpl attrs = new AttributesImpl();
          attrs.addAttribute(nsUri, CODE, CODE, "CDATA", "a");
          if (contentHandler != null) {
            contentHandler.startElement(nsUri, SUBFIELD, SUBFIELD, attrs);
            contentHandler.characters(value.toCharArray(), 0, value.length());
            contentHandler.endElement(nsUri, SUBFIELD, SUBFIELD);
          }
        }
      }
      if (contentHandler != null) {
        contentHandler.endElement(NS_URI, DATAFIELD, DATAFIELD);
      }
      datafieldOpen = false;
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void beginSubField(Field designator) {
    if (designator == null) {
      return;
    }
    try {
      AttributesImpl attrs = new AttributesImpl();
      String subfieldId = designator.subfieldId();
      if (subfieldId == null || subfieldId.length() == 0) {
        subfieldId = "a"; // fallback
      }
      attrs.addAttribute(nsUri, CODE, CODE, "CDATA", subfieldId);
      if (contentHandler != null) {
        contentHandler.startElement(nsUri, SUBFIELD, SUBFIELD, attrs);
      }
      if (listener != null) {
        listener.beginSubField(designator);
      }
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  @Override
  public void endSubField(Field designator) {
    if (designator == null) {
      return;
    }
    try {
      if (listener != null) {
        listener.endSubField(designator);
      }
      if (designator != null) {
        if (contentHandler != null) {
          String value = designator.data();
          if (!value.isEmpty()) {
            value = normalizeValue(value);
            contentHandler.characters(value.toCharArray(), 0, value.length());
          }
        }
      }
      if (contentHandler != null) {
        contentHandler.endElement(NS_URI, SUBFIELD, SUBFIELD);
      }
    } catch (Exception ex) {
      if (fatalerrors) {
        throw new RuntimeException(ex);
      } else if (!silenterrors) {
        logger.warn(designator + ": " + ex.getMessage(), ex);
      }
    }
  }

  protected String normalizeValue(String value) {
    return XMLUtil.clean(Normalizer.normalize(value, Form.NFC));
  }

  private class Iso2709StreamListener implements CharStreamListener {

    @Override
    public void data(String data) {
      String fieldContent = data;
      try {
        switch (mark) {
          case Separable.FS: // start/end file
            break;
          case Separable.GS: // start/end of group within a stream
            if (subfieldOpen) { // close subfield if open
              subfieldOpen = false;
              endDataField(null);
            }
            endDataField(designator);
            endRecord(); // close record
            // fall through is ok!
          case '\u0000': // start of stream
            position = 0;
            // skip line-feed (OCLC PICA quirk)
            if (data.charAt(0) == '\n') {
              fieldContent = data.substring(1);
            }
            if (fieldContent.length() >= RecordLabel.LENGTH) {
              beginRecord(format, type);
              String labelStr = fieldContent.substring(0, RecordLabel.LENGTH);
              label = new RecordLabel(labelStr.toCharArray());
              // auto-repair label
              leader(label.getFixed());
              directory = new FieldDirectory(label, fieldContent);
              if (directory.isEmpty()) {
                designator = new Field(label, fieldContent.substring(RecordLabel.LENGTH));
                if (designator.tag() != null) {
                  beginDataField(designator);
                }
              }
            } else {
              directory = new FieldDirectory(label, fieldContent);
              designator = new Field();
            }
            break;
          case Separable.RS:
            if (subfieldOpen) {
              subfieldOpen = false;
              endDataField(null); // force data field close
            } else if (designator != null && !designator.isEmpty()) {
              if (datafieldOpen) {
                endDataField(designator);
              }
            }
            if (directory == null || directory.isEmpty()) {
              designator = new Field(label, fieldContent);
            } else if (directory.containsKey(position)) {
              designator = new Field(label, directory.get(position), fieldContent, false);
            } else {
              throw new InvalidFieldDirectoryException(
                  "byte position not found in directory: "
                      + position
                      + " - is this stream reading using an 8-bit wide encoding?");
            }
            if (designator != null) {
              beginDataField(designator);
            }
            break;
          case Separable.US:
            if (!subfieldOpen) {
              subfieldOpen = true;
              beginDataField(designator);
            }
            if (designator != null) {
              designator = new Field(label, designator, fieldContent, true);
              beginSubField(designator);
            }
            endSubField(designator);
            break;
        }
      } catch (InvalidFieldDirectoryException ex) {
        logger.warn(ex.getMessage());
      } finally {
        position += data.length();
      }
    }

    @Override
    public void markUnit() {
      mark = Separable.US;
      position++;
    }

    @Override
    public void markRecord() {
      mark = Separable.RS;
      position++;
    }

    @Override
    public void markGroup() {
      mark = Separable.GS;
      position++;
    }

    @Override
    public void markFile() {
      mark = Separable.FS;
      position++;
      endDataField(null);
      endRecord();
    }
  }
}