예제 #1
0
 /**
  * @return a list of constituent text lines. The list holds the logical reading-order if a
  *     ReadingOrderResolver was run on the original document.
  */
 public List<BxLine> asLines() {
   List<BxLine> ret = new ArrayList<BxLine>();
   for (BxZone zone : asZones()) {
     ret.addAll(zone.getLines());
   }
   return ret;
 }
 private void appendZone(Document doc, Element parent, BxZone zone, Object... hints)
     throws TransformationException {
   Element node = doc.createElement("Zone");
   appendPropertyIfNotNull(doc, node, "ZoneID", zone.getId());
   appendBounds(doc, node, "ZoneCorners", zone.getBounds(), hints);
   appendPropertyIfNotNull(doc, node, "ZoneNext", zone.getNextId());
   Element insetsNode = doc.createElement("ZoneInsets");
   insetsNode.setAttribute("Top", "");
   insetsNode.setAttribute("Bottom", "");
   insetsNode.setAttribute("Left", "");
   insetsNode.setAttribute("Right", "");
   node.appendChild(insetsNode);
   appendProperty(doc, node, "ZoneLines", "");
   if (zone.getLabel() != null) {
     if (ZONE_LABEL_MAP.get(zone.getLabel()) != null
         && !ZONE_LABEL_MAP.get(zone.getLabel()).isEmpty()) {
       appendClassification(doc, node, ZONE_LABEL_MAP.get(zone.getLabel()).toUpperCase(), "");
     } else {
       throw new TransformationException("Writing down an unknown zone label: " + zone.getLabel());
     }
   }
   for (BxLine line : zone.getLines()) {
     appendLine(doc, node, line, hints);
   }
   parent.appendChild(node);
 }
 @Override
 public double calculateFeatureValue(BxZone zone, BxPage page) {
   double charSpace = 0.0;
   for (BxLine line : zone.getLines()) {
     for (BxWord word : line.getWords()) {
       for (BxChunk chunk : word.getChunks()) {
         charSpace += chunk.getArea();
       }
     }
   }
   double ret = zone.getArea() - charSpace;
   if (ret < 0) {
     return 0.0;
   } else {
     return ret;
   }
 }