// read or create index private void readOrCreateIndex(CollectionManager.Force ff, Formatter f) throws IOException { // force new index or test for new index needed boolean force = ((ff == CollectionManager.Force.always) || (ff == CollectionManager.Force.test && needsUpdate())); // otherwise, we're good as long as the index file exists File idx = gc.getIndexFile(); if (force || !idx.exists() || !readIndex(idx.getPath())) { logger.info("GribCollection {}: createIndex {}", gc.getName(), idx.getPath()); createIndex(idx, ff, f); // write out index gc.rafLocation = idx.getPath(); gc.setRaf(new RandomAccessFile(idx.getPath(), "r")); readIndex(gc.getRaf()); // read back in index } }
// LOOK what about extending an index ?? public boolean makeIndex(String filename, RandomAccessFile dataRaf, Formatter f) throws IOException { File idxFile = GribCollection.getIndexFile(filename + GBX9_IDX); FileOutputStream fout = new FileOutputStream(idxFile); RandomAccessFile raf = null; try { //// header message fout.write(MAGIC_START.getBytes("UTF-8")); NcStream.writeVInt(fout, version); Map<Long, Integer> gdsMap = new HashMap<Long, Integer>(); gdsList = new ArrayList<Grib1SectionGridDefinition>(); records = new ArrayList<Grib1Record>(200); Grib1IndexProto.Grib1Index.Builder rootBuilder = Grib1IndexProto.Grib1Index.newBuilder(); rootBuilder.setFilename(filename); if (dataRaf == null) { raf = new RandomAccessFile(filename, "r"); dataRaf = raf; } Grib1RecordScanner scan = new Grib1RecordScanner(dataRaf); while (scan.hasNext()) { Grib1Record r = scan.next(); if (r == null) break; // done records.add(r); Grib1SectionGridDefinition gds = r.getGDSsection(); Integer index = gdsMap.get(gds.calcCRC()); if (gds.getPredefinedGridDefinition() >= 0) // skip predefined gds - they dont have raw bytes index = 0; else if (index == null) { gdsList.add(gds); index = gdsList.size() - 1; gdsMap.put(gds.calcCRC(), index); rootBuilder.addGdsList(makeGdsProto(gds)); } rootBuilder.addRecords(makeRecordProto(r, index)); } ucar.nc2.grib.grib1.Grib1IndexProto.Grib1Index index = rootBuilder.build(); byte[] b = index.toByteArray(); NcStream.writeVInt(fout, b.length); // message size fout.write(b); // message - all in one gulp f.format(" made gbx9 index for %s size=%d%n", filename, b.length); return true; } finally { fout.close(); if (raf != null) raf.close(); } }
private boolean needsUpdate(long collectionLastModified, Formatter f) throws IOException { CollectionManager.ChangeChecker cc = Grib1Index.getChangeChecker(); for (CollectionManager dcm : tpc.makePartitions()) { // LOOK not really right, since we dont know if these files are the // same as in the index File idxFile = GribCollection.getIndexFile(dcm); if (!idxFile.exists()) return true; if (collectionLastModified < idxFile.lastModified()) return true; for (MFile mfile : dcm.getFiles()) { if (cc.hasChangedSince(mfile, idxFile.lastModified())) return true; } } return false; }
protected GribCollection.VariableIndex readVariable( GribCollectionProto.Variable pv, GribCollection.GroupHcs group) { int discipline = pv.getDiscipline(); int category = pv.getCategory(); int param = pv.getParameter(); int levelType = pv.getLevelType(); int intvType = pv.getIntervalType(); String intvName = pv.getIntvName(); boolean isLayer = pv.getIsLayer(); int ensDerivedType = pv.getEnsDerivedType(); int probType = pv.getProbabilityType(); String probabilityName = pv.getProbabilityName(); int cdmHash = pv.getCdmHash(); long recordsPos = pv.getRecordsPos(); int recordsLen = pv.getRecordsLen(); int timeIdx = pv.getTimeIdx(); int vertIdx = pv.getVertIdx(); int ensIdx = pv.getEnsIdx(); int tableVersion = pv.getTableVersion(); return gc.makeVariableIndex( group, tableVersion, discipline, category, param, levelType, isLayer, intvType, intvName, ensDerivedType, probType, probabilityName, cdmHash, timeIdx, vertIdx, ensIdx, recordsPos, recordsLen); }
/* MAGIC_START version sizeRecords VariableRecords (sizeRecords bytes) sizeIndex GribCollectionIndex (sizeIndex bytes) */ private boolean writeIndex(TimePartition.Partition canon, Formatter f) throws IOException { File file = tp.getIndexFile(); if (file.exists()) { if (!file.delete()) logger.error("Cant delete " + file.getPath()); } RandomAccessFile raf = new RandomAccessFile(file.getPath(), "rw"); raf.order(RandomAccessFile.BIG_ENDIAN); try { //// header message raf.write(MAGIC_START.getBytes("UTF-8")); raf.writeInt(version); raf.writeLong(0); // no record section GribCollectionProto.GribCollectionIndex.Builder indexBuilder = GribCollectionProto.GribCollectionIndex.newBuilder(); indexBuilder.setName(tp.getName()); GribCollection canonGc = canon.makeGribCollection(f); for (GribCollection.GroupHcs g : canonGc.getGroups()) indexBuilder.addGroups(writeGroupProto(g)); indexBuilder.setCenter(canonGc.getCenter()); indexBuilder.setSubcenter(canonGc.getSubcenter()); indexBuilder.setMaster(canonGc.getMaster()); indexBuilder.setLocal(canonGc.getLocal()); for (TimePartition.Partition p : tp.getPartitions()) { indexBuilder.addPartitions(writePartitionProto(p.getName(), (TimePartition.Partition) p)); } GribCollectionProto.GribCollectionIndex index = indexBuilder.build(); byte[] b = index.toByteArray(); NcStream.writeVInt(raf, b.length); // message size raf.write(b); // message - all in one gulp f.format("GribCollectionTimePartitionedIndex= %d bytes%n", b.length); } finally { f.format("file size = %d bytes%n", raf.length()); raf.close(); } return true; }
// consistency check on variables : compare each variable to corresponding one in proto // also set the groupno and partno for each partition private boolean checkPartitions(TimePartition.Partition canon, Formatter f) throws IOException { List<TimePartition.Partition> partitions = tp.getPartitions(); int npart = partitions.size(); boolean ok = true; // for each group in canonical Partition GribCollection canonGc = canon.makeGribCollection(f); for (GribCollection.GroupHcs firstGroup : canonGc.getGroups()) { String gname = firstGroup.getId(); if (trace) f.format(" Check Group %s%n", gname); // hash proto variables for quick lookup Map<Integer, GribCollection.VariableIndex> check = new HashMap<Integer, GribCollection.VariableIndex>(firstGroup.varIndex.size()); List<GribCollection.VariableIndex> varIndexP = new ArrayList<GribCollection.VariableIndex>(firstGroup.varIndex.size()); for (GribCollection.VariableIndex vi : firstGroup.varIndex) { TimePartition.VariableIndexPartitioned vip = tp.makeVariableIndexPartitioned(vi, npart); varIndexP.add(vip); check.put(vi.cdmHash, vip); // replace with its evil twin } firstGroup.varIndex = varIndexP; // replace with its evil twin // for each partition for (int partno = 0; partno < npart; partno++) { TimePartition.Partition tpp = partitions.get(partno); if (trace) f.format(" Check Partition %s%n", tpp.getName()); // get corresponding group GribCollection gc = tpp.makeGribCollection(f); int groupIdx = gc.findGroupIdxById(firstGroup.getId()); if (groupIdx < 0) { f.format(" Cant find group %s in partition %s%n", gname, tpp.getName()); ok = false; continue; } GribCollection.GroupHcs group = gc.getGroup(groupIdx); // for each variable in partition group for (int varIdx = 0; varIdx < group.varIndex.size(); varIdx++) { GribCollection.VariableIndex vi2 = group.varIndex.get(varIdx); if (trace) f.format(" Check variable %s%n", vi2); int flag = 0; GribCollection.VariableIndex vi1 = check.get(vi2.cdmHash); // compare with proto variable if (vi1 == null) { f.format( " WARN Cant find variable %s from %s in proto - ignoring that variable%n", vi2, tpp.getName()); continue; // we can tolerate this } // compare vert coordinates VertCoord vc1 = vi1.getVertCoord(); VertCoord vc2 = vi2.getVertCoord(); if ((vc1 == null) != (vc2 == null)) { f.format( " ERR Vert coordinates existence on variable %s in %s doesnt match%n", vi2, tpp.getName()); ok = false; } else if ((vc1 != null) && !vc1.equalsData(vc2)) { f.format( " WARN Vert coordinates values on variable %s in %s dont match%n", vi2, tpp.getName()); f.format(" canon vc = %s%n", vc1); f.format(" this vc = %s%n", vc2); flag |= TimePartition.VERT_COORDS_DIFFER; } // compare ens coordinates EnsCoord ec1 = vi1.getEnsCoord(); EnsCoord ec2 = vi2.getEnsCoord(); if ((ec1 == null) != (ec2 == null)) { f.format( " ERR Ensemble coordinates existence on variable %s in %s doesnt match%n", vi2, tpp.getName()); ok = false; } else if ((ec1 != null) && !ec1.equalsData(ec2)) { f.format( " WARN Ensemble coordinates values on variable %s in %s dont match%n", vi2, tpp.getName()); f.format(" canon ec = %s%n", ec1); f.format(" this ec = %s%n", ec2); flag |= TimePartition.ENS_COORDS_DIFFER; } ((TimePartition.VariableIndexPartitioned) vi1) .setPartitionIndex(partno, groupIdx, varIdx, flag); } // loop over variable } // loop over partition } // loop over group if (ok) f.format(" Partition check: vert, ens coords OK%n"); return ok; }
public boolean readIndex(String filename, long gribLastModified, CollectionManager.Force force) throws IOException { File idxFile = GribCollection.getIndexFile(filename + GBX9_IDX); if (!idxFile.exists()) return false; long idxModified = idxFile.lastModified(); if ((force != CollectionManager.Force.nocheck) && (idxModified < gribLastModified)) return false; // force new index if file was updated FileInputStream fin = new FileInputStream(idxFile); // LOOK need DiskCache for non-writeable directories try { //// check header is ok if (!NcStream.readAndTest(fin, MAGIC_START.getBytes())) { log.info("Bad magic number of grib index, on file" + idxFile); return false; } int v = NcStream.readVInt(fin); if (v != version) { if ((v == 0) || (v > version)) throw new IOException( "Grib1Index found version " + v + ", want version " + version + " on " + filename); if (log.isDebugEnabled()) log.debug( "Grib1Index found version " + v + ", want version " + version + " on " + filename); return false; } int size = NcStream.readVInt(fin); if (size <= 0 || size > 100 * 1000 * 1000) { // try to catch garbage log.warn("Grib1Index bad size = {} for {} ", size, filename); return false; } byte[] m = new byte[size]; NcStream.readFully(fin, m); Grib1IndexProto.Grib1Index proto = Grib1IndexProto.Grib1Index.parseFrom(m); String fname = proto.getFilename(); if (debug) System.out.printf("%s for %s%n", fname, filename); gdsList = new ArrayList<Grib1SectionGridDefinition>(proto.getGdsListCount()); for (Grib1IndexProto.Grib1GdsSection pgds : proto.getGdsListList()) { Grib1SectionGridDefinition gds = readGds(pgds); gdsList.add(gds); } if (debug) System.out.printf(" read %d gds%n", gdsList.size()); records = new ArrayList<Grib1Record>(proto.getRecordsCount()); for (Grib1IndexProto.Grib1Record precord : proto.getRecordsList()) { records.add(readRecord(precord)); } if (debug) System.out.printf(" read %d records%n", records.size()); } catch (java.lang.NegativeArraySizeException e) { log.error("GribIndex failed on " + filename, e); return false; } catch (IOException e) { log.error("GribIndex failed on " + filename, e); return false; } finally { fin.close(); } return true; }
private void createIndex( File indexFile, List<Group> groups, ArrayList<String> filenames, Formatter f) throws IOException { Grib2Record first = null; // take global metadata from here if (indexFile.exists()) indexFile.delete(); // replace it f.format(" createIndex for %s%n", indexFile.getPath()); RandomAccessFile raf = new RandomAccessFile(indexFile.getPath(), "rw"); raf.order(RandomAccessFile.BIG_ENDIAN); try { //// header message raf.write(MAGIC_START.getBytes("UTF-8")); raf.writeInt(version); long lenPos = raf.getFilePointer(); raf.writeLong(0); // save space to write the length of the record section long countBytes = 0; int countRecords = 0; for (Group g : groups) { g.fileSet = new HashSet<Integer>(); for (Grib2Rectilyser.VariableBag vb : g.rect.getGribvars()) { if (first == null) first = vb.first; GribCollectionProto.VariableRecords vr = writeRecordsProto(vb, g.fileSet); byte[] b = vr.toByteArray(); vb.pos = raf.getFilePointer(); vb.length = b.length; raf.write(b); countBytes += b.length; countRecords += vb.recordMap.length; } } long bytesPerRecord = countBytes / ((countRecords == 0) ? 1 : countRecords); f.format( " write RecordMaps: bytes = %d record = %d bytesPerRecord=%d%n", countBytes, countRecords, bytesPerRecord); if (first == null) { logger.error("GribCollection {}: has no files\n{}", gc.getName(), f.toString()); throw new IllegalArgumentException("GribCollection " + gc.getName() + " has no files"); } long pos = raf.getFilePointer(); raf.seek(lenPos); raf.writeLong(countBytes); raf.seek(pos); // back to the output. GribCollectionProto.GribCollectionIndex.Builder indexBuilder = GribCollectionProto.GribCollectionIndex.newBuilder(); indexBuilder.setName(gc.getName()); for (String fn : filenames) indexBuilder.addFiles(fn); for (Group g : groups) indexBuilder.addGroups(writeGroupProto(g)); /* int count = 0; for (DatasetCollectionManager dcm : collections) { indexBuilder.addParams(makeParamProto(new Parameter("spec" + count, dcm.()))); count++; } */ // what about just storing first ?? Grib2SectionIdentification ids = first.getId(); indexBuilder.setCenter(ids.getCenter_id()); indexBuilder.setSubcenter(ids.getSubcenter_id()); indexBuilder.setMaster(ids.getMaster_table_version()); indexBuilder.setLocal(ids.getLocal_table_version()); Grib2Pds pds = first.getPDS(); indexBuilder.setGenProcessType(pds.getGenProcessType()); indexBuilder.setGenProcessId(pds.getGenProcessId()); indexBuilder.setBackProcessId(pds.getBackProcessId()); GribCollectionProto.GribCollectionIndex index = indexBuilder.build(); byte[] b = index.toByteArray(); NcStream.writeVInt(raf, b.length); // message size raf.write(b); // message - all in one gulp f.format(" write GribCollectionIndex= %d bytes%n", b.length); } finally { f.format(" file size = %d bytes%n", raf.length()); raf.close(); if (raf != null) raf.close(); } }
// read all records in all files, // divide into groups based on GDS hash // each group has an arraylist of all records that belong to it. // for each group, run rectlizer to derive the coordinates and variables public List<Group> makeAggregatedGroups( ArrayList<String> filenames, CollectionManager.Force force, Formatter f) throws IOException { Map<Integer, Group> gdsMap = new HashMap<Integer, Group>(); f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName()); int total = 0; int fileno = 0; for (CollectionManager dcm : collections) { // dcm.scanIfNeeded(); // LOOK ?? f.format(" dcm= %s%n", dcm); Map<Integer, Integer> gdsConvert = (Map<Integer, Integer>) dcm.getAuxInfo("gdsHash"); for (MFile mfile : dcm.getFiles()) { // f.format("%3d: %s%n", fileno, mfile.getPath()); filenames.add(mfile.getPath()); Grib2Index index = new Grib2Index(); try { if (!index.readIndex( mfile.getPath(), mfile.getLastModified(), force)) { // heres where the index date is checked against the data file index.makeIndex(mfile.getPath(), f); f.format( " Index written: %s == %d records %n", mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size()); } else if (debug) { f.format( " Index read: %s == %d records %n", mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size()); } } catch (IOException ioe) { f.format( "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n skipping %s%n", ioe.getMessage(), mfile.getPath() + Grib2Index.IDX_EXT); continue; } for (Grib2Record gr : index.getRecords()) { gr.setFile(fileno); // each record tracks which file it belongs to int gdsHash = gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records if (gdsConvert != null && gdsConvert.get(gdsHash) != null) { // allow external config to muck with gdsHash. Why? because of error in // encoding gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching } Group g = gdsMap.get(gdsHash); if (g == null) { g = new Group(gr.getGDSsection(), gdsHash); gdsMap.put(gdsHash, g); } g.records.add(gr); total++; } fileno++; } } f.format(" total grib records= %d%n", total); Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter(); List<Group> result = new ArrayList<Group>(gdsMap.values()); for (Group g : result) { g.rect = new Grib2Rectilyser(g.records, g.gdsHash); f.format(" GDS hash %d == ", g.gdsHash); g.rect.make(f, c); } f.format( " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n", c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records); return result; }
public boolean readIndex(RandomAccessFile raf) { gc.setRaf(raf); // LOOK leaving the raf open in the GribCollection try { raf.order(RandomAccessFile.BIG_ENDIAN); raf.seek(0); //// header message if (!NcStream.readAndTest(raf, MAGIC_START.getBytes())) { logger.error("GribCollection {}: invalid index", gc.getName()); return false; } int v = raf.readInt(); if (v != getVersion()) { logger.warn( "GribCollection {}: index found version={}, want version= {} on file {}", new Object[] {gc.getName(), v, version, raf.getLocation()}); return false; } long skip = raf.readLong(); raf.skipBytes(skip); int size = NcStream.readVInt(raf); if ((size < 0) || (size > 100 * 1000 * 1000)) { logger.warn("GribCollection {}: invalid index ", gc.getName()); return false; } byte[] m = new byte[size]; raf.readFully(m); GribCollectionProto.GribCollectionIndex proto = GribCollectionProto.GribCollectionIndex.parseFrom(m); gc.center = proto.getCenter(); gc.subcenter = proto.getSubcenter(); gc.master = proto.getMaster(); gc.local = proto.getLocal(); gc.genProcessType = proto.getGenProcessType(); gc.genProcessId = proto.getGenProcessId(); gc.backProcessId = proto.getBackProcessId(); gc.local = proto.getLocal(); // gc.tables = Grib2Tables.factory(gc.center, gc.subcenter, gc.master, gc.local); gc.filenames = new ArrayList<String>(proto.getFilesCount()); for (int i = 0; i < proto.getFilesCount(); i++) gc.filenames.add(proto.getFiles(i)); // error condition on a GribCollection Index if ((proto.getFilesCount() == 0) && !(this instanceof TimePartitionBuilder)) { logger.warn("GribCollection {}: has no files, force recreate ", gc.getName()); return false; } gc.groups = new ArrayList<GribCollection.GroupHcs>(proto.getGroupsCount()); for (int i = 0; i < proto.getGroupsCount(); i++) gc.groups.add(readGroup(proto.getGroups(i), gc.makeGroup())); Collections.sort(gc.groups); gc.params = new ArrayList<Parameter>(proto.getParamsCount()); for (int i = 0; i < proto.getParamsCount(); i++) gc.params.add(readParam(proto.getParams(i))); if (!readPartitions(proto)) { logger.warn("TimePartition {}: has no partitions, force recreate ", gc.getName()); return false; } return true; } catch (Throwable t) { logger.error("Error reading index " + raf.getLocation(), t); return false; } }
public boolean needsUpdate() { File idx = gc.getIndexFile(); return !idx.exists() || needsUpdate(idx.lastModified()); }
// read all records in all files, // divide into groups based on GDS hash // each group has an arraylist of all records that belong to it. // for each group, run rectlizer to derive the coordinates and variables public List<Group> makeAggregatedGroups( List<String> filenames, CollectionManager.Force force, Formatter f) throws IOException { Map<Integer, Group> gdsMap = new HashMap<Integer, Group>(); boolean intvMerge = mergeIntvDefault; f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName()); int total = 0; int fileno = 0; for (CollectionManager dcm : collections) { f.format(" dcm= %s%n", dcm); FeatureCollectionConfig.GribConfig config = (FeatureCollectionConfig.GribConfig) dcm.getAuxInfo(FeatureCollectionConfig.AUX_GRIB_CONFIG); Map<Integer, Integer> gdsConvert = (config != null) ? config.gdsHash : null; FeatureCollectionConfig.GribIntvFilter intvMap = (config != null) ? config.intvFilter : null; intvMerge = (config == null) || (config.intvMerge == null) ? mergeIntvDefault : config.intvMerge; for (MFile mfile : dcm.getFiles()) { // f.format("%3d: %s%n", fileno, mfile.getPath()); filenames.add(mfile.getPath()); Grib2Index index = null; try { index = (Grib2Index) GribIndex.readOrCreateIndexFromSingleFile( false, !isSingleFile, mfile, config, force, f); } catch (IOException ioe) { logger.warn( "GribCollectionBuilder {}: reading/Creating gbx9 index failed err={}", gc.getName(), ioe.getMessage()); f.format( "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n skipping %s%n", ioe.getMessage(), mfile.getPath() + GribIndex.IDX_EXT); continue; } for (Grib2Record gr : index.getRecords()) { if (this.tables == null) { Grib2SectionIdentification ids = gr.getId(); // so all records must use the same table (!) this.tables = Grib2Customizer.factory( ids.getCenter_id(), ids.getSubcenter_id(), ids.getMaster_table_version(), ids.getLocal_table_version()); if (config != null) tables.setTimeUnitConverter( config .getTimeUnitConverter()); // LOOK doesnt really work with multiple collections } if (intvMap != null && filterTinv(gr, intvMap, f)) continue; // skip gr.setFile(fileno); // each record tracks which file it belongs to int gdsHash = gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records if (gdsConvert != null && gdsConvert.get(gdsHash) != null) // allow external config to muck with gdsHash. Why? because of error in // encoding gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching Group g = gdsMap.get(gdsHash); if (g == null) { g = new Group(gr.getGDSsection(), gdsHash); gdsMap.put(gdsHash, g); } g.records.add(gr); total++; } fileno++; } } f.format(" total grib records= %d%n", total); Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter(); // debugging List<Group> result = new ArrayList<Group>(gdsMap.values()); for (Group g : result) { g.rect = new Grib2Rectilyser(tables, g.records, g.gdsHash, intvMerge); f.format(" GDS hash %d == ", g.gdsHash); g.rect.make(f, c, filenames); } f.format( " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n", c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records); return result; }