Esempio n. 1
0
  /**
   * Dupmps the gdata feed to CSV
   *
   * @param schema input schema
   * @param cw CSVWriter
   * @param feed Google feed
   * @param gaq Google Analytics Query
   * @param dateExt date columns extender
   * @param extendDates add the date facts?
   * @throws IOException in case of an IO problem
   */
  public static int dump(
      SourceSchema schema,
      CSVWriter cw,
      DataFeed feed,
      GaQuery gaq,
      DateColumnsExtender dateExt,
      boolean extendDates)
      throws IOException {
    l.debug("Dumping GA feed.");
    String profileId = gaq.getIds();
    if (profileId == null || profileId.length() <= 0)
      throw new InvalidParameterException("Empty Google Analytics profile ID in query.");
    List<DataEntry> entries = feed.getEntries();
    List<Dimension> dimensions = null;
    List<String> dimensionNames = new ArrayList<String>();
    List<Metric> metrics = null;

    // Is there an IDENTITY connection point?
    int identityColumn = schema.getIdentityColumn();

    if (!entries.isEmpty()) {
      DataEntry singleEntry = entries.get(0);
      dimensions = singleEntry.getDimensions();
      metrics = singleEntry.getMetrics();
    } else return 0;

    final List<String> headers = new ArrayList<String>();
    for (Dimension dimension : dimensions) {
      headers.add(dimension.getName());
      dimensionNames.add(dimension.getName());
    }
    for (Metric metric : metrics) {
      headers.add(metric.getName());
    }

    final DateTimeFormatter inFmt = DateTimeFormat.forPattern(IN_FMT);
    final DateTimeFormatter outFmt = DateTimeFormat.forPattern(OUT_FMT);
    for (DataEntry entry : entries) {
      final List<String> row = new ArrayList<String>();
      String key = "";
      for (String dataName : headers) {
        final String valueIn = entry.stringValueOf(dataName);
        if (dimensionNames.contains(dataName)) key += valueIn + "|";
        String valueOut;
        if (GaConnector.GA_DATE.equalsIgnoreCase(dataName)) {
          if (valueIn == null || valueIn.length() != 8 || UNKNOWN_DATE.equals(valueIn)) {
            valueOut = "";
            l.debug("Invalid date value '" + valueIn + "'");
          } else {
            try {
              DateTime dt = inFmt.parseDateTime(valueIn);
              valueOut = outFmt.print(dt);
            } catch (IllegalArgumentException e) {
              valueOut = "";
              l.debug("Invalid date value '" + valueIn + "'");
            }
          }
        } else {
          valueOut = valueIn;
        }
        row.add(valueOut);
      }
      key += profileId;
      row.add(0, profileId);
      String hex = DigestUtils.md5Hex(key);
      if (identityColumn >= 0) {
        row.add(identityColumn, hex);
      }

      String[] r = row.toArray(new String[] {});
      // add the extra date columns
      if (extendDates) r = dateExt.extendRow(r);

      cw.writeNext(r);
    }
    l.debug("Dumped " + entries.size() + " rows from GA feed.");
    return entries.size();
  }