private Merger collectMerges(Table mergingTable, DatabaseTable toBeMerged, Connection connection) throws AlgorithmExecutionException { Merger merger = new Merger(toBeMerged, monitor); try { String[] primaryKeyColumns = toBeMerged.getPrimaryKeyColumns(connection); ColumnProjection primaryKeyColumnFilter = new NamedColumnProjection(primaryKeyColumns, true); ForeignKey[] foreignKeys = toBeMerged.getRelations(connection); for (ForeignKey foreignKey : foreignKeys) { // merge units are the units of work that will repoint the foreign keys referring to the // entities merged away to point at the primary entities merger.addMergeUnit(new MergeUnit(foreignKey)); } TableIterator merges = mergingTable.iterator( mergingTable.rowsSortedBy(CreateMergingTable.MERGE_GROUP_IDENTIFIER_COLUMN, true)); while (merges.hasNext()) { int row = merges.nextInt(); Tuple tuple = mergingTable.getTuple(row); String groupIdentifier = tuple.getString(CreateMergingTable.MERGE_GROUP_IDENTIFIER_COLUMN); // for every key someone used for a merge group, there's an EntityGroup EntityGroup group = merger.getOrCreateEntityGroup( groupIdentifier, toBeMerged, primaryKeyColumnFilter, foreignKeys); try { group.addRecord(tuple); } catch (MergingErrorException e) { problems.add(e.getMessage()); } } return merger; } catch (SQLException e) { throw new AlgorithmExecutionException("There was a problem creating the output data.", e); } }
/* * Input data from the "Place_Column_Name" is obtained from the original table & Lookups * are made to appropriate maps. After processing all rows, the new output table is * returned having original data and 2 new columns for Latitude & Longitude. */ public static Table compute( String locationColumnName, Table originalTable, LogService logger, Geocoder geocoder) { /* * Create Blank new output table using the schema from the original table. */ Table outputTable = originalTable.getSchema().instantiate(); String outputTableLatitudeColumnName = TableUtilities.formNonConflictingNewColumnName( originalTable.getSchema(), LATITUDE_COLUMN_NAME_SUGGESTIONS); String outputTableLongitudeColumnName = TableUtilities.formNonConflictingNewColumnName( originalTable.getSchema(), LONGITUDE_COLUMN_NAME_SUGGESTIONS); outputTable.addColumn(outputTableLatitudeColumnName, Double.class); outputTable.addColumn(outputTableLongitudeColumnName, Double.class); logger.log( LogService.LOG_INFO, String.format( "Latitude & Longitude values added to %s & %s, respectively.", outputTableLatitudeColumnName, outputTableLongitudeColumnName)); int locationColumnNumber = originalTable.getColumnNumber(locationColumnName); int latitudeColumnNumber = outputTable.getColumnNumber(outputTableLatitudeColumnName); int longitudeColumnNumber = outputTable.getColumnNumber(outputTableLongitudeColumnName); Map<String, Geolocation> geocodedAddressToGeoLocation = new HashMap<String, Geolocation>(); FrequencyMap<String> failedFrequency = new FrequencyMap<String>(true); Iterator<?> locationColumnIterator = originalTable.iterator(); while (locationColumnIterator.hasNext()) { int currentRowNumber = Integer.parseInt(locationColumnIterator.next().toString()); /* Start geocoding */ Geolocation geolocation = DEFAULT_NO_LOCATION_VALUE; String currentLocation = ""; Object currentLocationObject = originalTable.get(currentRowNumber, locationColumnNumber); if (currentLocationObject != null) { currentLocation = currentLocationObject.toString(); String currentLocationUppercase = currentLocation.toUpperCase(); /* Avoid re-geocoding the same place */ if (geocodedAddressToGeoLocation.containsKey(currentLocationUppercase)) { geolocation = geocodedAddressToGeoLocation.get(currentLocationUppercase); if (geolocation == DEFAULT_NO_LOCATION_VALUE) { failedFrequency.add(currentLocation); } } else { try { geolocation = geocoder.geocodingFullForm(currentLocationUppercase); } catch (GeoCoderException e) { try { /* Try lookup in the abbreviation */ geolocation = geocoder.geocodingAbbreviation(currentLocationUppercase); } catch (GeoCoderException e1) { /* No result is found */ failedFrequency.add(currentLocation); } } /* Add to geocoded map */ geocodedAddressToGeoLocation.put(currentLocationUppercase, geolocation); } } else { failedFrequency.add(currentLocation); } /* * Add the new row to the new table * by copying the original row & then adding 2 new columns to it. */ outputTable.addRow(); TableUtilities.copyTableRow(currentRowNumber, currentRowNumber, outputTable, originalTable); outputTable.set(currentRowNumber, latitudeColumnNumber, geolocation.getLatitude()); outputTable.set(currentRowNumber, longitudeColumnNumber, geolocation.getLongitude()); } /* Warning user about failure */ if (!failedFrequency.isEmpty()) { printWarningMessage(logger, locationColumnName, failedFrequency); } /* Show statistic information */ int totalRow = originalTable.getRowCount(); NumberFormat numberFormat = NumberFormat.getInstance(); logger.log( LogService.LOG_INFO, String.format( "Successfully geocoded %s out of %s locations to geographic coordinates", numberFormat.format(totalRow - failedFrequency.sum()), numberFormat.format(totalRow))); return outputTable; }