コード例 #1
0
ファイル: Utils.java プロジェクト: mkolod/pdfxtk
 public static boolean sameFontSize(TextSegment seg1, TextSegment seg2) {
   // TODO: maybe make it 10% of the smallest of the two?
   // return within(seg1.getFontSize(), seg2.getFontSize(), seg1.getFontSize() * 0.25f);
   float afs = (seg1.getFontSize() + seg2.getFontSize()) / 2.0f;
   if (!IS_OCR) return within(seg1.getFontSize(), seg2.getFontSize(), afs * 0.1f);
   else return within(seg1.getFontSize(), seg2.getFontSize(), afs * 0.50f);
 }
コード例 #2
0
ファイル: Utils.java プロジェクト: mkolod/pdfxtk
  // is used at all?
  private int findModalFontSize(Collection textBlocks) throws Exception {
    // pre: all items in textBlocks must be TextPosition objects
    // TODO: create a specific exception here

    // will count font sizes 0..96pt inclusive, rounding down to nearest integer
    int[] count = new int[96];

    Iterator textIter = textBlocks.iterator();

    while (textIter.hasNext()) {
      TextSegment thisBlock = null;
      try {
        // if empty text block, try again :)
        // (required so that empty text blocks do not interfere with processing)
        while (textIter.hasNext() && (thisBlock == null || thisBlock.isEmpty())) {
          thisBlock = (TextSegment) textIter.next();
        }
      } catch (java.lang.ClassCastException e) {
        throw new Exception("Objects in the collection must be of type TextSegment.");
      }

      if (thisBlock != null && thisBlock.getFontSize() > 0 && thisBlock.getFontSize() <= 96)
        count[new Double(thisBlock.getFontSize()).intValue()]++;
    }

    // loop through and find the highest
    // if more than one mode, return the lowest

    int highest = 0;

    for (int n = 0; n < count.length; n++) {
      if (count[n] > count[highest]) highest = n;
    }

    return highest;
  }
コード例 #3
0
ファイル: Utils.java プロジェクト: mkolod/pdfxtk
 public static float calculateThreshold(TextSegment seg1, TextSegment seg2, float multiple) {
   return (minimum(seg1.getFontSize(), seg2.getFontSize()) * multiple);
 }