Java HeaderParser примеры использования

Язык программирования: Java

Пространство имен/Пакет: java.io

Класс/Тип: HeaderParser

Примеров на hotexamples.com: 2

Java HeaderParser - 2 примера найдено. Это лучшие примеры Java кода для java.io.HeaderParser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

close(1)

processingHeaderBlock(1)

Пример #1

Показать файл

Файл: FullTextParser.java Проект: snpts/grobid

 @Override
 public void close() throws IOException {
   super.close();
   headerParser.close();
   headerParser = null;
   citationParser.close();
   citationParser = null;
 }

Пример #2

Показать файл

Файл: FullTextParser.java Проект: snpts/grobid

  public String processing2(String input, boolean consolidateHeader, boolean consolidateCitations)
      throws Exception {
    if (input == null) {
      throw new GrobidResourceException("Cannot process pdf file, because input file was null.");
    }
    File inputFile = new File(input);
    if (!inputFile.exists()) {
      throw new GrobidResourceException(
          "Cannot process pdf file, because input file '"
              + inputFile.getAbsolutePath()
              + "' does not exists.");
    }
    if (tmpPath == null) {
      throw new GrobidResourceException("Cannot process pdf file, because temp path is null.");
    }
    if (!tmpPath.exists()) {
      throw new GrobidResourceException(
          "Cannot process pdf file, because temp path '"
              + tmpPath.getAbsolutePath()
              + "' does not exists.");
    }
    doc = new Document(input, tmpPath.getAbsolutePath());
    try {
      int startPage = -1;
      int endPage = -1;
      pathXML =
          doc.pdf2xml(true, false, startPage, endPage, input, tmpPath.getAbsolutePath(), true);
      // with timeout,
      // no force pdf reloading
      // input is the pdf absolute path, tmpPath is the temp. directory for the temp. lxml file,
      // path is the resource path
      // and we process images in the pdf file
      if (pathXML == null) {
        throw new Exception("PDF parsing fails");
      }
      doc.setPathXML(pathXML);
      ArrayList<String> tokenizations = doc.addFeaturesDocument();

      if (doc.getBlocks() == null) {
        throw new Exception("PDF parsing resulted in empty content");
      }

      String fulltext = doc.getFulltextFeatured(true, true);

      StringTokenizer st = new StringTokenizer(fulltext, "\n");
      feedTaggerAndParse(st);

      StringBuilder res = new StringBuilder();
      for (int i = 0; i < tagger.size(); i++) {
        for (int j = 0; j < tagger.xsize(); j++) {
          res.append(tagger.x(i, j)).append("\t");
        }
        res.append(tagger.y2(i));
        res.append("\n");
      }

      // buffer for the fulltext block
      String rese = res.toString();

      // set the different sections of the Document object
      doc = BasicStructureBuilder.resultSegmentation(doc, rese, tokenizations);

      // header processing
      if (headerParser == null) {
        headerParser = new HeaderParser();
      }
      resHeader = new BiblioItem();
      headerParser.processingHeaderBlock(consolidateHeader, doc, resHeader);
      // the language identification is normally done during the header parsing, but only
      // based on header information.
      // LanguageUtilities languageUtilities = LanguageUtilities.getInstance();
      Language langu = languageUtilities.runLanguageId(resHeader.getTitle() + "\n" + doc.getBody());
      if (langu != null) {
        String lang = langu.getLangId();
        doc.setLanguage(lang);
        resHeader.setLanguage(lang);
      }

      // citation processing
      if (citationParser == null) {
        citationParser = new CitationParser();
      }
      ArrayList<BibDataSet> resCitations;

      // ArrayList<String> tokenizationsRef = doc.getTokenizationsReferences();
      // System.out.println(tokenizationsRef.toString());

      // resCitations = BasicStructureBuilder.giveReferenceSegments(doc);
      resCitations = doc.bibDataSets;

      if (resCitations != null) {
        for (BibDataSet bds : resCitations) {
          String marker = bds.getRefSymbol();
          if (marker != null) {
            marker = marker.replace(".", "");
            marker = marker.replace(" ", "");
            bds.setRefSymbol(marker);
          }
          BiblioItem bib = citationParser.processing(bds.getRawBib(), consolidateCitations);
          bds.setResBib(bib);
        }
      }

      // final combination
      return toTEI(doc, rese, tokenizations, resHeader, false, null, false);
    } catch (Exception e) {
      throw new GrobidException("An exception occured while running Grobid.", e);
    } finally {
      // keep it clean when leaving...
      doc.cleanLxmlFile(pathXML, false);
    }
  }