Java Line.getDistances Examples

Programming Language: Java

Namespace/Package Name: javax.xml.transform

Class/Type: Line

Method/Function: getDistances

Examples at hotexamples.com: 2

Java Line.getDistances - 2 examples found. These are the top rated real world Java examples of javax.xml.transform.Line.getDistances extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getClusters(3)

getClusterSize(3)

getAverageY1(2)

getClusterX1(2)

getClusterX2(2)

getDistances(2)

getLineNumber(2)

toString(2)

getAverageY2(1)

getCellObject(1)

getDistanceThreshold(1)

getFirstX1(1)

getLastX2(1)

getY1OfFirstWord(1)

getY1OfLastWord(1)

setLineNumber(1)

Example #1

Show file

File: Table2.java Project: Traple/TableExtraction

 /**
  * This method finds the average distances between the partitions and parses those to the
  * validation object for the calculation of the column confidence.
  */
 private void setClusterCertainties() {
   method:
   while (true) {
     ArrayList<Integer> totalDistances = data.get(0).getDistances();
     for (Line line : data) {
       if (data.indexOf(line) > 0) {
         for (int x = 0; x < line.getDistances().size(); x++) {
           if (!(x >= totalDistances.size() || x >= line.getDistances().size())) {
             int totalDistance = totalDistances.get(x) + line.getDistances().get(x);
             totalDistances.set(x, totalDistance);
           } else {
             LOGGER.info(
                 "Found a problem during the cluster certainties. I've given the table a very low confidence");
             ArrayList<Integer> lowValidation = new ArrayList<Integer>();
             for (int o : line.getDistances()) {
               lowValidation.add(0);
             }
             validation.setClusterCertainty(lowValidation, data.get(0).getDistanceThreshold());
             validation.setLineThreshold(data.get(0).getDistanceThreshold());
             break method;
           }
         }
       }
     }
     ArrayList<Integer> averageDistances = new ArrayList<Integer>();
     for (int distance : totalDistances) {
       averageDistances.add(distance / data.size());
     }
     validation.setClusterCertainty(averageDistances, data.get(0).getDistanceThreshold());
     validation.setLineThreshold(data.get(0).getDistanceThreshold());
     break method;
   }
 }

Example #2

Show file

File: Table2.java Project: Traple/TableExtraction

  /**
   * This is the constructor of the table class. It takes it's parameters and sets them as local
   * variables. It also puts the default values for the rest of the table and then starts calling
   * the other methods in this class. to extract the table according to the rules of TEA.
   *
   * @param spans These are the words below the table detection from the Page class.
   * @param charLengthThreshold This is the character length threshold as calculated in the Page
   *     class.
   * @param file This is the File that was used to extract the table from. It is only used for the
   *     creation of provenance.
   * @param workspace This is the workspace as specified by the user.
   * @param tableID This is the ID of the detected table. It is mainly used for the creation of the
   *     output file and for provenance.
   * @param verticalThresholdModifier The modifier from the configuration file that should be used
   *     to indicate how much space there should be between lines
   * @param horizontalThresholdModifier The modifier used for creating the threshold in horizontal
   *     partitioning.
   * @param averageLineDistance The average (vertical) distance between lines as calculated in the
   *     Page class.
   * @param debugging is true if the program is in debugging mode.
   * @param allowedHeaderIterations The amount of iterations that the program is allowed to run,
   *     searching for headers.
   * @param allowedHeaderSize The amount of headers supported by the program. Implemented as a last
   *     cut-off if thresholding fails.
   * @throws IOException When one of the files cant be found
   */
  public Table2(
      Elements spans,
      double charLengthThreshold,
      File file,
      int pageNumber,
      String workspace,
      int tableID,
      double verticalThresholdModifier,
      double horizontalThresholdModifier,
      double averageLineDistance,
      boolean debugging,
      int allowedHeaderSize,
      int allowedHeaderIterations)
      throws IOException {
    String debugContent = "";
    this.averageLineDistance = averageLineDistance;
    this.maxY1 = 0;
    this.spans = spans;
    this.name = "";
    this.horizontalThresholdModifier = horizontalThresholdModifier;
    this.verticalThresholdModifier = verticalThresholdModifier;
    this.validation = new Validation();
    this.validation.setAverageDistanceBetweenRows(averageLineDistance);
    this.pageNumber = pageNumber;

    if (spans.size() > 0) {
      setMaxY1();
      this.table = new ArrayList<Line>();

      createLines(charLengthThreshold);
      separateDataByCluster();
      filterLinesThatAreAboveY1();

      if (data.size() > 1) {
        System.out.println(getRawTable());
        debugContent = debugContent + getRawTable() + "\n";
        filterEmptyLines();
        findMissingData();
        findColumns();
        createColumns(charLengthThreshold);
        checkColumns();
        debugContent = debugContent + "lines with missing data: " + linesWithMissingData + "\n";
        if (linesWithMissingData != null) {
          addLinesWithMissingDataToColumns();
        }
        fillBlankCells();
      } else {
        LOGGER.info(
            "The word Table was detected but no clusters were found.\n"
                + "It was found at position: "
                + maxY1);
      }
      if (data.size() > 1) {
        for (Line line : data) {
          validation.setClusterCertainty(line.getDistances(), line.getDistanceThreshold());
          validation.setLineThreshold(line.getDistanceThreshold());
        }
        LOGGER.info("Table: " + getName());
        System.out.println("In Table: " + getName());
        System.out.println(
            "~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-");
        System.out.println("Data in this table is: ");

        ArrayList<Integer> distances = new ArrayList<Integer>();
        Column2 lastColumn = null;
        for (Column2 column : dataInColumns) {
          System.out.println(column);
          if (dataInColumns.indexOf(column) == 0) {
            lastColumn = column;
            continue;
          }
          if (lastColumn != null) {
            distances.add(column.getAverageX1() - lastColumn.getAverageX2());
          }
        }
        validation.setClusterCertainty(
            distances, averageLineDistance * horizontalThresholdModifier);
        if (linesWithMissingData != null && linesWithMissingData.size() > 0) {
          System.out.println(
              "~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-");
          System.out.println(
              "The following lines were detected for having missing data or it was a line that had more clusters then the rest of the table.: ");
          for (Line line : linesWithMissingData) {
            System.out.println(line);
          }
        }
        System.out.println(
            "~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-");
        if (rowSpanners.size() > 0) {
          System.out.println("Potential rowspanners: ");
          for (Line line : rowSpanners) {
            System.out.println(line);
          }
        }
        System.out.println("Validation:\n" + validation);
        System.out.println(table);
        setClusterCertainties();
        System.out.println("Checking out the semantics.");
        SemanticFramework semanticFramework =
            new SemanticFramework(
                dataInColumns,
                (averageLineDistance * verticalThresholdModifier),
                rowSpanners,
                charLengthThreshold * horizontalThresholdModifier,
                table,
                validation,
                titleAndHeaders,
                allowedHeaderSize,
                allowedHeaderIterations);
        System.out.println("Checking for false positive...");
        checkForFalsePositive();
        System.out.println("False positive: " + validation.getFalsePositive());
        LOGGER.info("False positive: " + validation.getFalsePositive());
        System.out.println();
        fillBlankCells();
        System.out.println(semanticFramework);
        System.out.println("Calculating final table statistics.");
        setTableBoundaries(semanticFramework);
        System.out.println("Now writing to file.");
        write2(
            (workspace),
            file,
            tableID,
            semanticFramework); // write: getXMLContent(file, tableID, semanticFramework.getXML()),
        if (debugging) {
          writeDebugFile(debugContent, workspace, file);
        }
      } else {
        LOGGER.info("All the found data was filtered out!");
      }
    }
  }