// scan has been done, create FmrcInv private FmrcInv makeFmrcInv(Formatter debug) throws IOException { try { Map<CalendarDate, FmrInv> fmrMap = new HashMap<CalendarDate, FmrInv>(); // all files are grouped by run date in an FmrInv List<FmrInv> fmrList = new ArrayList<FmrInv>(); // an fmrc is a collection of fmr // get the inventory, sorted by path for (MFile f : manager.getFiles()) { if (logger.isDebugEnabled()) logger.debug("Fmrc: " + config.spec + ": file=" + f.getPath()); GridDatasetInv inv = null; try { inv = GridDatasetInv.open( manager, f, config.innerNcml); // inventory is discovered for each GDS } catch (IOException ioe) { logger.warn("Error opening " + f.getPath() + "(skipped)", ioe); continue; // skip } CalendarDate runDate = inv.getRunDate(); if (debug != null) debug.format(" opened %s rundate = %s%n", f.getPath(), inv.getRunDateString()); // add to fmr for that rundate FmrInv fmr = fmrMap.get(runDate); if (fmr == null) { fmr = new FmrInv(runDate); fmrMap.put(runDate, fmr); fmrList.add(fmr); } fmr.addDataset(inv, debug); } if (debug != null) debug.format("%n"); // finish the FmrInv Collections.sort(fmrList); for (FmrInv fmr : fmrList) { fmr.finish(); if (logger.isDebugEnabled()) logger.debug( "Fmrc: spec=" + config.spec + ": fmr rundate=" + fmr.getRunDate() + " nfiles= " + fmr.getFiles().size()); } return new FmrcInv( "fmrc:" + manager.getCollectionName(), fmrList, config.fmrcConfig.regularize); } catch (Throwable t) { logger.error("makeFmrcInv", t); throw new RuntimeException(t); } }
public boolean accept(MFile dataset) { if (dataset == null) return false; // If no Selectors, accept all datasets. if (this.selectorGroup.isEmpty()) return true; if (dataset.isDirectory()) { // If no collection selectors, accept all collection datasets. if (!this.containsCollectionIncluders && !this.containsCollectionExcluders) return true; } else { // If no atomic selectors, accept all atomic datasets. if (!this.containsAtomicIncluders && !this.containsAtomicExcluders) return true; } boolean include = false; boolean exclude = false; for (Selector curSelector : this.selectorGroup) { if (curSelector.isApplicable(dataset)) { if (curSelector.match(dataset)) { if (curSelector.isIncluder()) include = true; else exclude = true; } } } // Deal with atomic datasets if (!dataset.isDirectory()) { // If have only inclusion Selectors, accept any dataset that is explicitly included. if (this.containsAtomicIncluders && !this.containsAtomicExcluders) return include; // If have only exclusion Selectors, accept any dataset not explicitly excluded. if (this.containsAtomicExcluders && !this.containsAtomicIncluders) return !exclude; // If have both inclusion and exclusion Selectors, accept datasets that are // explicitly included but not explicitly excluded. if (this.containsAtomicIncluders && this.containsAtomicExcluders && include) return !exclude; // Deal with collection datasets } else { // If have only inclusion Selectors, accept any dataset that is explicitly included. if (this.containsCollectionIncluders && !this.containsCollectionExcluders) return include; // If have only exclusion Selectors, accept any dataset not explicitly excluded. if (this.containsCollectionExcluders && !this.containsCollectionIncluders) return !exclude; // If have both inclusion and exclusion Selectors, accept datasets that are // explicitly included but not explicitly excluded. if (this.containsCollectionIncluders && this.containsCollectionExcluders && include) return !exclude; } // Otherwise, don't accept. return false; }
/** * Accept datasets whose last modified date is at least the last modified limit of * milliseconds in the past. * * @param dataset the dataset to filter * @return true if the datasets last modified date is at least lastModifiedLimitInMillis in * the past. */ public boolean accept(MFile dataset) { long lastModDate = dataset.getLastModified(); if (lastModDate > 0) { long now = System.currentTimeMillis(); if (now - lastModDate > lastModifiedLimitInMillis) return true; } return false; }
// single file private Grib2CollectionBuilder(MFile file, FeatureCollectionConfig.GribConfig config, Formatter f) throws IOException { this.isSingleFile = true; try { // String spec = StringUtil2.substitute(file.getPath(), "\\", "/"); CollectionManager dcm = new CollectionManagerSingleFile(file); if (config != null) dcm.putAuxInfo(FeatureCollectionConfig.AUX_GRIB_CONFIG, config); this.collections.add(dcm); this.gc = new Grib2Collection(file.getName(), new File(dcm.getRoot()), config); } catch (Exception e) { ByteArrayOutputStream bos = new ByteArrayOutputStream(10000); e.printStackTrace(new PrintStream(bos)); f.format("%s", bos.toString()); throw new IOException(e); } }
// read all records in all files, // divide into groups based on GDS hash // each group has an arraylist of all records that belong to it. // for each group, run rectlizer to derive the coordinates and variables public List<Group> makeAggregatedGroups( ArrayList<String> filenames, CollectionManager.Force force, Formatter f) throws IOException { Map<Integer, Group> gdsMap = new HashMap<Integer, Group>(); f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName()); int total = 0; int fileno = 0; for (CollectionManager dcm : collections) { // dcm.scanIfNeeded(); // LOOK ?? f.format(" dcm= %s%n", dcm); Map<Integer, Integer> gdsConvert = (Map<Integer, Integer>) dcm.getAuxInfo("gdsHash"); for (MFile mfile : dcm.getFiles()) { // f.format("%3d: %s%n", fileno, mfile.getPath()); filenames.add(mfile.getPath()); Grib2Index index = new Grib2Index(); try { if (!index.readIndex( mfile.getPath(), mfile.getLastModified(), force)) { // heres where the index date is checked against the data file index.makeIndex(mfile.getPath(), f); f.format( " Index written: %s == %d records %n", mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size()); } else if (debug) { f.format( " Index read: %s == %d records %n", mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size()); } } catch (IOException ioe) { f.format( "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n skipping %s%n", ioe.getMessage(), mfile.getPath() + Grib2Index.IDX_EXT); continue; } for (Grib2Record gr : index.getRecords()) { gr.setFile(fileno); // each record tracks which file it belongs to int gdsHash = gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records if (gdsConvert != null && gdsConvert.get(gdsHash) != null) { // allow external config to muck with gdsHash. Why? because of error in // encoding gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching } Group g = gdsMap.get(gdsHash); if (g == null) { g = new Group(gr.getGDSsection(), gdsHash); gdsMap.put(gdsHash, g); } g.records.add(gr); total++; } fileno++; } } f.format(" total grib records= %d%n", total); Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter(); List<Group> result = new ArrayList<Group>(gdsMap.values()); for (Group g : result) { g.rect = new Grib2Rectilyser(g.records, g.gdsHash); f.format(" GDS hash %d == ", g.gdsHash); g.rect.make(f, c); } f.format( " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n", c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records); return result; }
public Date getDate(MFile mfile) { if (useName) return DateFromString.getDateUsingDemarkatedCount(mfile.getName(), dateFormatMark, '#'); else return DateFromString.getDateUsingDemarkatedMatch(mfile.getPath(), dateFormatMark, '#'); }
/** * Test if this selector applies to the given dataset. * * @param dataset the CrawlableDataset to test if this selector applies. * @return true if this selector applies to the given dataset, false otherwise. */ public boolean isApplicable(MFile dataset) { if (this.applyToAtomicDataset && !dataset.isDirectory()) return true; if (this.applyToCollectionDataset && dataset.isDirectory()) return true; return false; }
public boolean accept(MFile dataset) { java.util.regex.Matcher matcher = this.pattern.matcher(dataset.getName()); return matcher.matches(); }
public boolean accept(MFile mfile) { java.util.regex.Matcher matcher = this.pattern.matcher(mfile.getName()); return matcher.matches(); }
// read all records in all files, // divide into groups based on GDS hash // each group has an arraylist of all records that belong to it. // for each group, run rectlizer to derive the coordinates and variables public List<Group> makeAggregatedGroups( List<String> filenames, CollectionManager.Force force, Formatter f) throws IOException { Map<Integer, Group> gdsMap = new HashMap<Integer, Group>(); boolean intvMerge = mergeIntvDefault; f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName()); int total = 0; int fileno = 0; for (CollectionManager dcm : collections) { f.format(" dcm= %s%n", dcm); FeatureCollectionConfig.GribConfig config = (FeatureCollectionConfig.GribConfig) dcm.getAuxInfo(FeatureCollectionConfig.AUX_GRIB_CONFIG); Map<Integer, Integer> gdsConvert = (config != null) ? config.gdsHash : null; FeatureCollectionConfig.GribIntvFilter intvMap = (config != null) ? config.intvFilter : null; intvMerge = (config == null) || (config.intvMerge == null) ? mergeIntvDefault : config.intvMerge; for (MFile mfile : dcm.getFiles()) { // f.format("%3d: %s%n", fileno, mfile.getPath()); filenames.add(mfile.getPath()); Grib2Index index = null; try { index = (Grib2Index) GribIndex.readOrCreateIndexFromSingleFile( false, !isSingleFile, mfile, config, force, f); } catch (IOException ioe) { logger.warn( "GribCollectionBuilder {}: reading/Creating gbx9 index failed err={}", gc.getName(), ioe.getMessage()); f.format( "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n skipping %s%n", ioe.getMessage(), mfile.getPath() + GribIndex.IDX_EXT); continue; } for (Grib2Record gr : index.getRecords()) { if (this.tables == null) { Grib2SectionIdentification ids = gr.getId(); // so all records must use the same table (!) this.tables = Grib2Customizer.factory( ids.getCenter_id(), ids.getSubcenter_id(), ids.getMaster_table_version(), ids.getLocal_table_version()); if (config != null) tables.setTimeUnitConverter( config .getTimeUnitConverter()); // LOOK doesnt really work with multiple collections } if (intvMap != null && filterTinv(gr, intvMap, f)) continue; // skip gr.setFile(fileno); // each record tracks which file it belongs to int gdsHash = gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records if (gdsConvert != null && gdsConvert.get(gdsHash) != null) // allow external config to muck with gdsHash. Why? because of error in // encoding gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching Group g = gdsMap.get(gdsHash); if (g == null) { g = new Group(gr.getGDSsection(), gdsHash); gdsMap.put(gdsHash, g); } g.records.add(gr); total++; } fileno++; } } f.format(" total grib records= %d%n", total); Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter(); // debugging List<Group> result = new ArrayList<Group>(gdsMap.values()); for (Group g : result) { g.rect = new Grib2Rectilyser(tables, g.records, g.gdsHash, intvMerge); f.format(" GDS hash %d == ", g.gdsHash); g.rect.make(f, c, filenames); } f.format( " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n", c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records); return result; }