예제 #1
0
  private Table processJournalName(Table table) throws IOException {

    ScimapsJournalMatcher scimapsJournalMatcher = new ScimapsJournalMatcher();

    // Create new output table
    Table outputTable = TableUtilities.copyTable(table);
    outputTable.addColumn(STANDARDIZED_JOURNAL_NAME_COLUMN, String.class);
    int standardizedJournalNameColumnIndex =
        outputTable.getColumnNumber(STANDARDIZED_JOURNAL_NAME_COLUMN);

    // Retrieve iterator
    Iterator<?> rows = outputTable.tuples();

    // Process journal names
    int rowIndex = 0;
    while (rows.hasNext()) {
      Tuple row = (Tuple) rows.next();
      if (row.canGetString(journalColumnName)) {
        String name = row.getString(journalColumnName);
        String suggestedName = scimapsJournalMatcher.get(name);
        outputTable.setString(rowIndex, standardizedJournalNameColumnIndex, suggestedName);
      }
      rowIndex++;
    }
    return outputTable;
  }
예제 #2
0
  /*
   * Input data from the "Place_Column_Name" is obtained from the original table & Lookups
   * are made to appropriate maps. After processing all rows, the new output table is
   * returned having original data and 2 new columns for Latitude & Longitude.
   */
  public static Table compute(
      String locationColumnName, Table originalTable, LogService logger, Geocoder geocoder) {
    /*
     * Create Blank new output table using the schema from the original table.
     */
    Table outputTable = originalTable.getSchema().instantiate();
    String outputTableLatitudeColumnName =
        TableUtilities.formNonConflictingNewColumnName(
            originalTable.getSchema(), LATITUDE_COLUMN_NAME_SUGGESTIONS);
    String outputTableLongitudeColumnName =
        TableUtilities.formNonConflictingNewColumnName(
            originalTable.getSchema(), LONGITUDE_COLUMN_NAME_SUGGESTIONS);

    outputTable.addColumn(outputTableLatitudeColumnName, Double.class);
    outputTable.addColumn(outputTableLongitudeColumnName, Double.class);

    logger.log(
        LogService.LOG_INFO,
        String.format(
            "Latitude & Longitude values added to %s & %s, respectively.",
            outputTableLatitudeColumnName, outputTableLongitudeColumnName));

    int locationColumnNumber = originalTable.getColumnNumber(locationColumnName);
    int latitudeColumnNumber = outputTable.getColumnNumber(outputTableLatitudeColumnName);
    int longitudeColumnNumber = outputTable.getColumnNumber(outputTableLongitudeColumnName);
    Map<String, Geolocation> geocodedAddressToGeoLocation = new HashMap<String, Geolocation>();
    FrequencyMap<String> failedFrequency = new FrequencyMap<String>(true);
    Iterator<?> locationColumnIterator = originalTable.iterator();
    while (locationColumnIterator.hasNext()) {
      int currentRowNumber = Integer.parseInt(locationColumnIterator.next().toString());

      /* Start geocoding */
      Geolocation geolocation = DEFAULT_NO_LOCATION_VALUE;
      String currentLocation = "";
      Object currentLocationObject = originalTable.get(currentRowNumber, locationColumnNumber);
      if (currentLocationObject != null) {
        currentLocation = currentLocationObject.toString();
        String currentLocationUppercase = currentLocation.toUpperCase();

        /* Avoid re-geocoding the same place */
        if (geocodedAddressToGeoLocation.containsKey(currentLocationUppercase)) {
          geolocation = geocodedAddressToGeoLocation.get(currentLocationUppercase);
          if (geolocation == DEFAULT_NO_LOCATION_VALUE) {
            failedFrequency.add(currentLocation);
          }
        } else {
          try {
            geolocation = geocoder.geocodingFullForm(currentLocationUppercase);
          } catch (GeoCoderException e) {
            try {
              /* Try lookup in the abbreviation */
              geolocation = geocoder.geocodingAbbreviation(currentLocationUppercase);
            } catch (GeoCoderException e1) {
              /* No result is found */
              failedFrequency.add(currentLocation);
            }
          }

          /* Add to geocoded map */
          geocodedAddressToGeoLocation.put(currentLocationUppercase, geolocation);
        }
      } else {
        failedFrequency.add(currentLocation);
      }

      /*
       * Add the new row to the new table
       * by copying the original row & then adding 2 new columns to it.
       */
      outputTable.addRow();
      TableUtilities.copyTableRow(currentRowNumber, currentRowNumber, outputTable, originalTable);
      outputTable.set(currentRowNumber, latitudeColumnNumber, geolocation.getLatitude());
      outputTable.set(currentRowNumber, longitudeColumnNumber, geolocation.getLongitude());
    }

    /* Warning user about failure */
    if (!failedFrequency.isEmpty()) {
      printWarningMessage(logger, locationColumnName, failedFrequency);
    }

    /* Show statistic information */
    int totalRow = originalTable.getRowCount();
    NumberFormat numberFormat = NumberFormat.getInstance();
    logger.log(
        LogService.LOG_INFO,
        String.format(
            "Successfully geocoded %s out of %s locations to geographic coordinates",
            numberFormat.format(totalRow - failedFrequency.sum()), numberFormat.format(totalRow)));
    return outputTable;
  }