Exemple #1
0
  private Merger collectMerges(Table mergingTable, DatabaseTable toBeMerged, Connection connection)
      throws AlgorithmExecutionException {

    Merger merger = new Merger(toBeMerged, monitor);
    try {
      String[] primaryKeyColumns = toBeMerged.getPrimaryKeyColumns(connection);
      ColumnProjection primaryKeyColumnFilter = new NamedColumnProjection(primaryKeyColumns, true);

      ForeignKey[] foreignKeys = toBeMerged.getRelations(connection);
      for (ForeignKey foreignKey : foreignKeys) {
        // merge units are the units of work that will repoint the foreign keys referring to the
        // entities merged away to point at the primary entities
        merger.addMergeUnit(new MergeUnit(foreignKey));
      }

      TableIterator merges =
          mergingTable.iterator(
              mergingTable.rowsSortedBy(CreateMergingTable.MERGE_GROUP_IDENTIFIER_COLUMN, true));

      while (merges.hasNext()) {
        int row = merges.nextInt();
        Tuple tuple = mergingTable.getTuple(row);
        String groupIdentifier = tuple.getString(CreateMergingTable.MERGE_GROUP_IDENTIFIER_COLUMN);
        // for every key someone used for a merge group, there's an EntityGroup
        EntityGroup group =
            merger.getOrCreateEntityGroup(
                groupIdentifier, toBeMerged, primaryKeyColumnFilter, foreignKeys);
        try {
          group.addRecord(tuple);
        } catch (MergingErrorException e) {
          problems.add(e.getMessage());
        }
      }

      return merger;

    } catch (SQLException e) {
      throw new AlgorithmExecutionException("There was a problem creating the output data.", e);
    }
  }
Exemple #2
0
  /*
   * Input data from the "Place_Column_Name" is obtained from the original table & Lookups
   * are made to appropriate maps. After processing all rows, the new output table is
   * returned having original data and 2 new columns for Latitude & Longitude.
   */
  public static Table compute(
      String locationColumnName, Table originalTable, LogService logger, Geocoder geocoder) {
    /*
     * Create Blank new output table using the schema from the original table.
     */
    Table outputTable = originalTable.getSchema().instantiate();
    String outputTableLatitudeColumnName =
        TableUtilities.formNonConflictingNewColumnName(
            originalTable.getSchema(), LATITUDE_COLUMN_NAME_SUGGESTIONS);
    String outputTableLongitudeColumnName =
        TableUtilities.formNonConflictingNewColumnName(
            originalTable.getSchema(), LONGITUDE_COLUMN_NAME_SUGGESTIONS);

    outputTable.addColumn(outputTableLatitudeColumnName, Double.class);
    outputTable.addColumn(outputTableLongitudeColumnName, Double.class);

    logger.log(
        LogService.LOG_INFO,
        String.format(
            "Latitude & Longitude values added to %s & %s, respectively.",
            outputTableLatitudeColumnName, outputTableLongitudeColumnName));

    int locationColumnNumber = originalTable.getColumnNumber(locationColumnName);
    int latitudeColumnNumber = outputTable.getColumnNumber(outputTableLatitudeColumnName);
    int longitudeColumnNumber = outputTable.getColumnNumber(outputTableLongitudeColumnName);
    Map<String, Geolocation> geocodedAddressToGeoLocation = new HashMap<String, Geolocation>();
    FrequencyMap<String> failedFrequency = new FrequencyMap<String>(true);
    Iterator<?> locationColumnIterator = originalTable.iterator();
    while (locationColumnIterator.hasNext()) {
      int currentRowNumber = Integer.parseInt(locationColumnIterator.next().toString());

      /* Start geocoding */
      Geolocation geolocation = DEFAULT_NO_LOCATION_VALUE;
      String currentLocation = "";
      Object currentLocationObject = originalTable.get(currentRowNumber, locationColumnNumber);
      if (currentLocationObject != null) {
        currentLocation = currentLocationObject.toString();
        String currentLocationUppercase = currentLocation.toUpperCase();

        /* Avoid re-geocoding the same place */
        if (geocodedAddressToGeoLocation.containsKey(currentLocationUppercase)) {
          geolocation = geocodedAddressToGeoLocation.get(currentLocationUppercase);
          if (geolocation == DEFAULT_NO_LOCATION_VALUE) {
            failedFrequency.add(currentLocation);
          }
        } else {
          try {
            geolocation = geocoder.geocodingFullForm(currentLocationUppercase);
          } catch (GeoCoderException e) {
            try {
              /* Try lookup in the abbreviation */
              geolocation = geocoder.geocodingAbbreviation(currentLocationUppercase);
            } catch (GeoCoderException e1) {
              /* No result is found */
              failedFrequency.add(currentLocation);
            }
          }

          /* Add to geocoded map */
          geocodedAddressToGeoLocation.put(currentLocationUppercase, geolocation);
        }
      } else {
        failedFrequency.add(currentLocation);
      }

      /*
       * Add the new row to the new table
       * by copying the original row & then adding 2 new columns to it.
       */
      outputTable.addRow();
      TableUtilities.copyTableRow(currentRowNumber, currentRowNumber, outputTable, originalTable);
      outputTable.set(currentRowNumber, latitudeColumnNumber, geolocation.getLatitude());
      outputTable.set(currentRowNumber, longitudeColumnNumber, geolocation.getLongitude());
    }

    /* Warning user about failure */
    if (!failedFrequency.isEmpty()) {
      printWarningMessage(logger, locationColumnName, failedFrequency);
    }

    /* Show statistic information */
    int totalRow = originalTable.getRowCount();
    NumberFormat numberFormat = NumberFormat.getInstance();
    logger.log(
        LogService.LOG_INFO,
        String.format(
            "Successfully geocoded %s out of %s locations to geographic coordinates",
            numberFormat.format(totalRow - failedFrequency.sum()), numberFormat.format(totalRow)));
    return outputTable;
  }