Java BxZone.getLabel примеры использования

Язык программирования: Java

Пространство имен/Пакет: java.util

Класс/Тип: BxZone

Метод/Функция: getLabel

Примеров на hotexamples.com: 4

Java BxZone.getLabel - 4 примера найдено. Это лучшие примеры Java кода для java.util.BxZone.getLabel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getLabel(4)

getBounds(2)

addLine(1)

getId(1)

getLines(1)

getNextId(1)

setBounds(1)

setLabel(1)

Пример #1

Показать файл

Файл: BxDocumentToTrueVizWriter.java Проект: pszostek/CERMINE

 private void appendZone(Document doc, Element parent, BxZone zone, Object... hints)
     throws TransformationException {
   Element node = doc.createElement("Zone");
   appendPropertyIfNotNull(doc, node, "ZoneID", zone.getId());
   appendBounds(doc, node, "ZoneCorners", zone.getBounds(), hints);
   appendPropertyIfNotNull(doc, node, "ZoneNext", zone.getNextId());
   Element insetsNode = doc.createElement("ZoneInsets");
   insetsNode.setAttribute("Top", "");
   insetsNode.setAttribute("Bottom", "");
   insetsNode.setAttribute("Left", "");
   insetsNode.setAttribute("Right", "");
   node.appendChild(insetsNode);
   appendProperty(doc, node, "ZoneLines", "");
   if (zone.getLabel() != null) {
     if (ZONE_LABEL_MAP.get(zone.getLabel()) != null
         && !ZONE_LABEL_MAP.get(zone.getLabel()).isEmpty()) {
       appendClassification(doc, node, ZONE_LABEL_MAP.get(zone.getLabel()).toUpperCase(), "");
     } else {
       throw new TransformationException("Writing down an unknown zone label: " + zone.getLabel());
     }
   }
   for (BxLine line : zone.getLines()) {
     appendLine(doc, node, line, hints);
   }
   parent.appendChild(node);
 }

Пример #2

Показать файл

Файл: SegmentationEvaluator.java Проект: mkoniari/CERMINE

  private LevelResults compareZones(BxPage expected, BxPage actual) {
    Map<BxChunk, BxZone> map = BxModelUtils.mapChunksToZones(actual);

    LevelResults results = new LevelResults();
    for (BxZone expectedZone : expected) {
      if (ignoredLabels.contains(expectedZone.getLabel())) {
        continue;
      }
      Set<BxZone> actualZones = new HashSet<BxZone>();
      for (BxLine line : expectedZone) {
        for (BxWord word : line) {
          for (BxChunk chunk : word) {
            actualZones.add(map.get(chunk));
          }
        }
      }
      if (actualZones.size() == 1) {
        for (BxZone actualZone : actualZones) {
          if (BxModelUtils.countChunks(actualZone) == BxModelUtils.countChunks(expectedZone)) {
            results.matched++;
          }
        }
      }
      results.all++;
    }

    return results;
  }

Пример #3

Показать файл

Файл: SegmentationEvaluator.java Проект: mkoniari/CERMINE

  private LevelResults compareWords(BxPage expected, BxPage actual) {
    Map<BxChunk, BxWord> map = BxModelUtils.mapChunksToWords(actual);

    LevelResults results = new LevelResults();
    for (BxZone expectedZone : expected) {
      if (ignoredLabels.contains(expectedZone.getLabel())) {
        continue;
      }
      for (BxLine expectedLine : expectedZone) {
        for (BxWord expectedWord : expectedLine) {
          Set<BxWord> actualWords = new HashSet<BxWord>();
          for (BxChunk chunk : expectedWord) {
            actualWords.add(map.get(chunk));
          }
          if (actualWords.size() == 1) {
            for (BxWord actualWord : actualWords) {
              if (actualWord.childrenCount() == expectedWord.childrenCount()) {
                results.matched++;
              }
            }
          }
          results.all++;
        }
      }
    }

    return results;
  }

Пример #4

Показать файл

Файл: HeuristicContentHeadersExtractor.java Проект: mkoniari/CERMINE

  @Override
  public BxContentStructure extractHeaders(BxDocument document) throws AnalysisException {

    Population heightPopulation = new Population();
    Population fontPopulation = new Population();
    Population distancePopulation = new Population();
    Population lengthPopulation = new Population();
    Population indentationPopulation = new Population();

    Set<BxLine> candidates = new HashSet<BxLine>();
    for (BxPage page : document) {
      for (BxZone zone : page) {
        if (zone.getLabel().equals(BxZoneLabel.BODY_CONTENT)
            || zone.getLabel().equals(BxZoneLabel.GEN_BODY)) {
          for (BxLine line : zone) {
            heightPopulation.addObservation(line.getHeight());
            lengthPopulation.addObservation(line.getWidth());
            indentationPopulation.addObservation(line.getX());
            if (line.hasPrev() && line.getY() - line.getPrev().getY() > 0) {
              distancePopulation.addObservation(line.getY() - line.getPrev().getY());
            }
            fontPopulation.addObservation(getFontIndex(line));

            if (isFirstInZone(line) && looksLikeHeader(line)) {
              candidates.add(line);
            }
          }
        }
      }
    }

    Set<BxLine> toDelete = new HashSet<BxLine>();

    for (BxLine line : candidates) {
      if (shouldBeRemoved(
          line, heightPopulation, fontPopulation, distancePopulation, indentationPopulation)) {
        toDelete.add(line);
      }
      if (lengthPopulation.getZScore(line.getWidth()) > candMaxLengthZScore) {
        toDelete.add(line);
      }
    }

    candidates.removeAll(toDelete);
    toDelete.clear();

    Set<String> headerFonts = new HashSet<String>();
    List<BxLine> candidatesList = Lists.newArrayList(candidates);

    for (int x = 0; x < candidatesList.size(); x++) {
      BxLine line1 = candidatesList.get(x);
      for (int y = x + 1; y < candidatesList.size(); y++) {
        BxLine line2 = candidatesList.get(y);
        for (int z = y + 1; z < candidatesList.size(); z++) {
          BxLine line3 = candidatesList.get(z);
          if (line1.getMostPopularFontName().equals(line2.getMostPopularFontName())
              && line3.getMostPopularFontName().equals(line2.getMostPopularFontName())
              && Math.abs(fontPopulation.getZScore(getFontIndex(line1))) > outlFontZScore) {
            headerFonts.add(line1.getMostPopularFontName());
          }
        }
      }
    }

    for (BxPage page : document) {
      for (BxZone zone : page) {
        if (zone.getLabel().equals(BxZoneLabel.BODY_CONTENT)
            || zone.getLabel().equals(BxZoneLabel.GEN_BODY)) {
          for (BxLine line : zone) {
            if (looksLikeHeader(line) && headerFonts.contains(line.getMostPopularFontName())) {
              candidates.add(line);
            }
          }
        }
      }
    }

    for (BxLine line : candidates) {
      if (shouldBeRemoved(
          line, heightPopulation, fontPopulation, distancePopulation, indentationPopulation)) {
        toDelete.add(line);
      }
      if (lengthPopulation.getZScore(line.getWidth()) > candMaxLengthZScore2) {
        toDelete.add(line);
      }
    }

    candidates.removeAll(toDelete);
    toDelete.clear();

    for (BxLine line : candidates) {
      int i = 0;
      for (BxLine line2 : candidates) {
        if (line.equals(line2)) {
          continue;
        }
        if (areSimilar(line, line2)) {
          i++;
        }
      }
      if (i == 0 || i > maxSimilarLinesCount) {
        toDelete.add(line);
        for (BxLine line2 : candidates) {
          if (areSimilar(line, line2)) {
            toDelete.add(line2);
          }
        }
      }
    }

    candidates.removeAll(toDelete);

    candidatesList = new ArrayList<BxLine>();
    for (BxPage page : document) {
      for (BxZone zone : page) {
        for (BxLine line : zone) {
          if (candidates.contains(line)) {
            candidatesList.add(line);
          }
        }
      }
    }
    int clusters[] = headersClusterizer.clusterLines(candidatesList);
    Set<Integer> keptClusters = new HashSet<Integer>();
    for (int clusterIdx = 0; clusterIdx < clusters.length; clusterIdx++) {
      int cluster = clusters[clusterIdx];
      if (keptClusters.size() < 3) {
        keptClusters.add(cluster);
      }
      if (!keptClusters.contains(cluster)) {
        candidates.remove(candidatesList.get(clusterIdx));
      }
    }

    BxContentStructure contentStructure = new BxContentStructure();
    BxLine lastHeaderLine = null;
    for (BxPage page : document) {
      for (BxZone zone : page) {
        if (zone.getLabel().equals(BxZoneLabel.BODY_CONTENT)
            || zone.getLabel().equals(BxZoneLabel.GEN_BODY)) {
          for (BxLine line : zone) {
            if (candidates.contains(line)) {
              contentStructure.addFirstHeaderLine(page, line);
              lastHeaderLine = line;
            } else if (zone.getLabel().equals(BxZoneLabel.BODY_CONTENT)
                || zone.getLabel().equals(BxZoneLabel.GEN_BODY)) {
              if (lastHeaderLine == null) {
                BxChunk chunk = new BxChunk(new BxBounds(), "--");
                BxWord word = new BxWord().addChunk(chunk);
                lastHeaderLine = new BxLine().addWord(word);
                contentStructure.addFirstHeaderLine(page, lastHeaderLine);
              }
              contentStructure.addContentLine(lastHeaderLine, line);
            }
          }
        }
      }
    }

    headerLinesCompletener.completeLines(contentStructure);

    return contentStructure;
  }