public DateColumnsExtender(SourceSchema schema) { dateColumnIndexes = new ArrayList<Integer>(); dateColumnFormats = new ArrayList<DateTimeFormatter>(); identityColumn = schema.getIdentityColumn(); dates = schema.getDates(); for (int i = 0; i < dates.size(); i++) { SourceColumn c = dates.get(i); dateColumnIndexes.add(schema.getColumnIndex(c)); String fmt = c.getFormat(); if (fmt == null || fmt.length() <= 0) { if (c.isDatetime()) fmt = Constants.DEFAULT_DATETIME_FMT_STRING; else fmt = Constants.DEFAULT_DATE_FMT_STRING; } // in case of UNIX TIME we don't format but create the date from the UNIX time number if (Constants.UNIX_DATE_FORMAT.equalsIgnoreCase(fmt)) { fmt = Constants.DEFAULT_DATETIME_FMT_STRING; } dateColumnFormats.add(DateTimeFormat.forPattern(fmt)); } }
/** * Dupmps the gdata feed to CSV * * @param schema input schema * @param cw CSVWriter * @param feed Google feed * @param gaq Google Analytics Query * @param dateExt date columns extender * @param extendDates add the date facts? * @throws IOException in case of an IO problem */ public static int dump( SourceSchema schema, CSVWriter cw, DataFeed feed, GaQuery gaq, DateColumnsExtender dateExt, boolean extendDates) throws IOException { l.debug("Dumping GA feed."); String profileId = gaq.getIds(); if (profileId == null || profileId.length() <= 0) throw new InvalidParameterException("Empty Google Analytics profile ID in query."); List<DataEntry> entries = feed.getEntries(); List<Dimension> dimensions = null; List<String> dimensionNames = new ArrayList<String>(); List<Metric> metrics = null; // Is there an IDENTITY connection point? int identityColumn = schema.getIdentityColumn(); if (!entries.isEmpty()) { DataEntry singleEntry = entries.get(0); dimensions = singleEntry.getDimensions(); metrics = singleEntry.getMetrics(); } else return 0; final List<String> headers = new ArrayList<String>(); for (Dimension dimension : dimensions) { headers.add(dimension.getName()); dimensionNames.add(dimension.getName()); } for (Metric metric : metrics) { headers.add(metric.getName()); } final DateTimeFormatter inFmt = DateTimeFormat.forPattern(IN_FMT); final DateTimeFormatter outFmt = DateTimeFormat.forPattern(OUT_FMT); for (DataEntry entry : entries) { final List<String> row = new ArrayList<String>(); String key = ""; for (String dataName : headers) { final String valueIn = entry.stringValueOf(dataName); if (dimensionNames.contains(dataName)) key += valueIn + "|"; String valueOut; if (GaConnector.GA_DATE.equalsIgnoreCase(dataName)) { if (valueIn == null || valueIn.length() != 8 || UNKNOWN_DATE.equals(valueIn)) { valueOut = ""; l.debug("Invalid date value '" + valueIn + "'"); } else { try { DateTime dt = inFmt.parseDateTime(valueIn); valueOut = outFmt.print(dt); } catch (IllegalArgumentException e) { valueOut = ""; l.debug("Invalid date value '" + valueIn + "'"); } } } else { valueOut = valueIn; } row.add(valueOut); } key += profileId; row.add(0, profileId); String hex = DigestUtils.md5Hex(key); if (identityColumn >= 0) { row.add(identityColumn, hex); } String[] r = row.toArray(new String[] {}); // add the extra date columns if (extendDates) r = dateExt.extendRow(r); cw.writeNext(r); } l.debug("Dumped " + entries.size() + " rows from GA feed."); return entries.size(); }