Ejemplo n.º 1
0
  @Override
  public void getNext(CAS aCAS) throws IOException, CollectionException {
    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    } catch (CASException e) {
      throw new CollectionException(e);
    }

    try {
      // parse the xml file
      File xmlFile = GlobalFileStorage.getInstance().poll();

      System.out.println("Process file: " + xmlFile.getName());

      SAXParserFactory spf = SAXParserFactory.newInstance();
      SAXParser sp = spf.newSAXParser();
      XMLReader xr = sp.getXMLReader();

      LinkedList<String[]> textElements = new LinkedList<>();
      FragmentContentHandler fch = new FragmentContentHandler(xr, textElements);
      xr.setContentHandler(fch);
      xr.parse(new InputSource(new FileInputStream(xmlFile)));

      StringBuilder docText = new StringBuilder();

      for (String[] element : textElements) {

        int start = docText.length();
        int end = start + element[1].length();

        docText.append(element[1] + "\n\n");

        Section section = new Section(jcas, start, end);
        section.setValue(element[0]);
        section.addToIndexes();
      }

      jcas.setDocumentText(docText.toString().trim());
      jcas.setDocumentLanguage(language);

      DocumentMetaData docMetaData = DocumentMetaData.create(aCAS);
      docMetaData.setDocumentTitle(xmlFile.getName());
      docMetaData.setDocumentId(xmlFile.getAbsolutePath());
      docMetaData.setDocumentBaseUri("file:" + xmlFile.getParentFile().getAbsolutePath());
      docMetaData.setDocumentUri("file:" + xmlFile.getAbsolutePath());

    } catch (Exception e) {
      // e.printStackTrace();
      throw new CollectionException(e);
    }
  }
Ejemplo n.º 2
0
  @Override
  public boolean hasNext() throws IOException, CollectionException {

    return !GlobalFileStorage.getInstance().isEmpty();
  }