/** * Builds Collections of Grib1 Time Partitioned. * * @author caron * @since 1/7/12 */ public class Grib1TimePartitionBuilder extends Grib1CollectionBuilder { public static final String MAGIC_START = "Grib1Partition0Index"; private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(Grib1TimePartitionBuilder.class); // static private final int versionTP = 4; private static final boolean trace = false; // called by tdm public static boolean update(TimePartitionCollection tpc, Formatter f) throws IOException { Grib1TimePartitionBuilder builder = new Grib1TimePartitionBuilder(tpc.getCollectionName(), new File(tpc.getRoot()), tpc); if (!builder.needsUpdate()) return false; builder.readOrCreateIndex(CollectionManager.Force.always, f); builder.gc.close(); return true; } // read in the index, create if it doesnt exist or is out of date public static Grib1TimePartition factory( TimePartitionCollection tpc, CollectionManager.Force force, Formatter f) throws IOException { Grib1TimePartitionBuilder builder = new Grib1TimePartitionBuilder(tpc.getCollectionName(), new File(tpc.getRoot()), tpc); builder.readOrCreateIndex(force, f); return builder.tp; } // read in the index, index raf already open public static Grib1TimePartition createFromIndex( String name, File directory, RandomAccessFile raf) throws IOException { Grib1TimePartitionBuilder builder = new Grib1TimePartitionBuilder(name, directory, null); if (builder.readIndex(raf)) { return builder.tp; } throw new IOException("Reading index failed"); } /** * write new index if needed * * @param tpc use this collection * @param force force index * @param f put status messagess here * @return true if index was written * @throws IOException on error */ public static boolean writeIndexFile( TimePartitionCollection tpc, CollectionManager.Force force, Formatter f) throws IOException { Grib1TimePartitionBuilder builder = null; try { builder = new Grib1TimePartitionBuilder(tpc.getCollectionName(), new File(tpc.getRoot()), tpc); return builder.readOrCreateIndex(force, f); } finally { if ((builder != null) && (builder.tp != null)) builder.tp.close(); } } ////////////////////////////////////////////////////////////////////////////////// private final TimePartitionCollection tpc; // defines the partition private final Grib1TimePartition tp; // build this object private Grib1TimePartitionBuilder(String name, File directory, TimePartitionCollection tpc) { FeatureCollectionConfig.GribConfig config = (tpc == null) ? null : (FeatureCollectionConfig.GribConfig) tpc.getAuxInfo(FeatureCollectionConfig.AUX_GRIB_CONFIG); this.tp = new Grib1TimePartition(name, directory, config); this.gc = tp; this.tpc = tpc; } private boolean readOrCreateIndex(CollectionManager.Force ff, Formatter f) throws IOException { File idx = gc.getIndexFile(); // force new index or test for new index needed boolean force = ((ff == CollectionManager.Force.always) || (ff == CollectionManager.Force.test && needsUpdate(idx.lastModified(), f))); // otherwise, we're good as long as the index file exists and can be read if (force || !idx.exists() || !readIndex(idx.getPath())) { logger.info("TimePartitionBuilder createIndex {}", idx.getPath()); createPartitionedIndex(f); // write out index readIndex(idx.getPath()); // read back in index return true; } return false; } private boolean needsUpdate(long collectionLastModified, Formatter f) throws IOException { CollectionManager.ChangeChecker cc = Grib1Index.getChangeChecker(); for (CollectionManager dcm : tpc.makePartitions()) { // LOOK not really right, since we dont know if these files are the // same as in the index File idxFile = GribCollection.getIndexFile(dcm); if (!idxFile.exists()) return true; if (collectionLastModified < idxFile.lastModified()) return true; for (MFile mfile : dcm.getFiles()) { if (cc.hasChangedSince(mfile, idxFile.lastModified())) return true; } } return false; } /////////////////////////////////////////////////// // create the index private boolean createPartitionedIndex(Formatter f) throws IOException { long start = System.currentTimeMillis(); // create partitions based on TimePartitionCollections object for (CollectionManager dcm : tpc.makePartitions()) { tp.addPartition(dcm); } List<TimePartition.Partition> bad = new ArrayList<TimePartition.Partition>(); for (TimePartition.Partition dc : tp.getPartitions()) { try { dc.makeGribCollection(f); // ensure collection has been read successfully if (trace) f.format(" Open partition %s%n", dc.getDcm().getCollectionName()); } catch (Throwable t) { logger.error(" Failed to open partition " + dc.getName(), t); f.format(" FAIL on partition %s (remove) %n", dc.getDcm().getCollectionName()); bad.add(dc); // LOOK may be a file leak ? } } // remove ones that failed for (TimePartition.Partition p : bad) tp.removePartition(p); // choose the "canonical" partition, aka prototype int n = tp.getPartitions().size(); if (n == 0) { logger.error(" Nothing in this partition = " + tp.getName()); f.format(" FAIL Partition empty collection = %s%n", tp.getName()); return false; } int idx = tpc.getProtoIndex(n); TimePartition.Partition canon = tp.getPartitions().get(idx); f.format(" Using canonical partition %s%n", canon.getDcm().getCollectionName()); // check consistency across vert and ens coords if (!checkPartitions(canon, f)) { logger.error( " Partition check failed, index not written on {} message = {}", tp.getName(), f.toString()); f.format(" FAIL Partition check collection = %s%n", tp.getName()); return false; } // make the time coordinates, place results into canon createPartitionedTimeCoordinates(canon, f); // ready to write the index file writeIndex(canon, f); // close open gc's tp.cleanup(); long took = System.currentTimeMillis() - start; f.format(" CreatePartitionedIndex took %d msecs%n", took); return true; } // consistency check on variables : compare each variable to corresponding one in proto // also set the groupno and partno for each partition private boolean checkPartitions(TimePartition.Partition canon, Formatter f) throws IOException { List<TimePartition.Partition> partitions = tp.getPartitions(); int npart = partitions.size(); boolean ok = true; // for each group in canonical Partition GribCollection canonGc = canon.makeGribCollection(f); for (GribCollection.GroupHcs firstGroup : canonGc.getGroups()) { String gname = firstGroup.getId(); if (trace) f.format(" Check Group %s%n", gname); // hash proto variables for quick lookup Map<Integer, GribCollection.VariableIndex> check = new HashMap<Integer, GribCollection.VariableIndex>(firstGroup.varIndex.size()); List<GribCollection.VariableIndex> varIndexP = new ArrayList<GribCollection.VariableIndex>(firstGroup.varIndex.size()); for (GribCollection.VariableIndex vi : firstGroup.varIndex) { TimePartition.VariableIndexPartitioned vip = tp.makeVariableIndexPartitioned(vi, npart); varIndexP.add(vip); check.put(vi.cdmHash, vip); // replace with its evil twin } firstGroup.varIndex = varIndexP; // replace with its evil twin // for each partition for (int partno = 0; partno < npart; partno++) { TimePartition.Partition tpp = partitions.get(partno); if (trace) f.format(" Check Partition %s%n", tpp.getName()); // get corresponding group GribCollection gc = tpp.makeGribCollection(f); int groupIdx = gc.findGroupIdxById(firstGroup.getId()); if (groupIdx < 0) { f.format(" Cant find group %s in partition %s%n", gname, tpp.getName()); ok = false; continue; } GribCollection.GroupHcs group = gc.getGroup(groupIdx); // for each variable in partition group for (int varIdx = 0; varIdx < group.varIndex.size(); varIdx++) { GribCollection.VariableIndex vi2 = group.varIndex.get(varIdx); if (trace) f.format(" Check variable %s%n", vi2); int flag = 0; GribCollection.VariableIndex vi1 = check.get(vi2.cdmHash); // compare with proto variable if (vi1 == null) { f.format( " WARN Cant find variable %s from %s in proto - ignoring that variable%n", vi2, tpp.getName()); continue; // we can tolerate this } // compare vert coordinates VertCoord vc1 = vi1.getVertCoord(); VertCoord vc2 = vi2.getVertCoord(); if ((vc1 == null) != (vc2 == null)) { f.format( " ERR Vert coordinates existence on variable %s in %s doesnt match%n", vi2, tpp.getName()); ok = false; } else if ((vc1 != null) && !vc1.equalsData(vc2)) { f.format( " WARN Vert coordinates values on variable %s in %s dont match%n", vi2, tpp.getName()); f.format(" canon vc = %s%n", vc1); f.format(" this vc = %s%n", vc2); flag |= TimePartition.VERT_COORDS_DIFFER; } // compare ens coordinates EnsCoord ec1 = vi1.getEnsCoord(); EnsCoord ec2 = vi2.getEnsCoord(); if ((ec1 == null) != (ec2 == null)) { f.format( " ERR Ensemble coordinates existence on variable %s in %s doesnt match%n", vi2, tpp.getName()); ok = false; } else if ((ec1 != null) && !ec1.equalsData(ec2)) { f.format( " WARN Ensemble coordinates values on variable %s in %s dont match%n", vi2, tpp.getName()); f.format(" canon ec = %s%n", ec1); f.format(" this ec = %s%n", ec2); flag |= TimePartition.ENS_COORDS_DIFFER; } ((TimePartition.VariableIndexPartitioned) vi1) .setPartitionIndex(partno, groupIdx, varIdx, flag); } // loop over variable } // loop over partition } // loop over group if (ok) f.format(" Partition check: vert, ens coords OK%n"); return ok; } private class PartGroup { GribCollection.GroupHcs group; TimePartition.Partition tpp; private PartGroup(GribCollection.GroupHcs group, TimePartition.Partition tpp) { this.group = group; this.tpp = tpp; } } private boolean createPartitionedTimeCoordinates(TimePartition.Partition canon, Formatter f) throws IOException { List<TimePartition.Partition> partitions = tp.getPartitions(); boolean ok = true; // for each group in canonical Partition for (GribCollection.GroupHcs firstGroup : canon.makeGribCollection(f).getGroups()) { String gname = firstGroup.getId(); if (trace) f.format(" Check Group %s%n", gname); // get list of corresponding groups from all the time partition, so we dont have to keep // looking it up List<PartGroup> pgList = new ArrayList<PartGroup>(partitions.size()); for (TimePartition.Partition dc : partitions) { GribCollection.GroupHcs gg = dc.makeGribCollection(f).findGroupById(gname); if (gg == null) logger.error(" Cant find group {} in partition {}", gname, dc.getName()); else pgList.add(new PartGroup(gg, dc)); } // unique time coordinate unions List<TimeCoordUnion> unionList = new ArrayList<TimeCoordUnion>(); // for each variable in canonical Partition for (GribCollection.VariableIndex viCanon : firstGroup.varIndex) { if (trace) f.format(" Check variable %s%n", viCanon); TimeCoord tcCanon = viCanon.getTimeCoord(); List<TimeCoord> tcPartitions = new ArrayList<TimeCoord>(pgList.size()); // for each partition, get the time index for (PartGroup pg : pgList) { // get corresponding variable GribCollection.VariableIndex vi2 = pg.group.findVariableByHash(viCanon.cdmHash); if (vi2 == null) { // apparently not in the file f.format( " WARN Cant find variable %s in partition %s / %s%n", viCanon, pg.tpp.getName(), pg.group.getId()); tcPartitions.add(null); } else { if (vi2.timeIdx < 0 || vi2.timeIdx >= pg.group.timeCoords.size()) { logger.error(" timeIdx out of range var= {} on partition {}", vi2, pg.tpp.getName()); tcPartitions.add(null); } else { TimeCoord tc2 = vi2.getTimeCoord(); if (tc2.isInterval() != tcCanon.isInterval()) { logger.error( " timeIdx wrong interval type var= {} on partition {}", vi2, pg.tpp.getName()); tcPartitions.add(null); } else { tcPartitions.add(tc2); } } } } // union of time coordinates TimeCoordUnion union = new TimeCoordUnion(tcCanon.getCode(), tcPartitions, tcCanon); // store result in the first group viCanon.partTimeCoordIdx = TimeCoordUnion.findUnique(unionList, union); // this merges identical TimeCoordUnion } /* turn TimeIndex into TimeCoord for (int tidx = 0; tidx <unionList.size(); tidx++) { TimeCoordUnion union = unionList.get(tidx); f.format(" %s %d: timeIndexList=", firstGroup.hcs.getName(), tidx); for (int idx : union.) f.format("%d,",idx); f.format("%n"); } */ // store results in first group firstGroup.timeCoordPartitions = unionList; } return ok; } ////////////////////////////////////////////////////////// @Override public String getMagicStart() { return MAGIC_START; } // writing ncx /* MAGIC_START version sizeRecords VariableRecords (sizeRecords bytes) sizeIndex GribCollectionIndex (sizeIndex bytes) */ private boolean writeIndex(TimePartition.Partition canon, Formatter f) throws IOException { File file = tp.getIndexFile(); if (file.exists()) { if (!file.delete()) logger.error("Cant delete " + file.getPath()); } RandomAccessFile raf = new RandomAccessFile(file.getPath(), "rw"); raf.order(RandomAccessFile.BIG_ENDIAN); try { //// header message raf.write(MAGIC_START.getBytes("UTF-8")); raf.writeInt(version); raf.writeLong(0); // no record section GribCollectionProto.GribCollectionIndex.Builder indexBuilder = GribCollectionProto.GribCollectionIndex.newBuilder(); indexBuilder.setName(tp.getName()); GribCollection canonGc = canon.makeGribCollection(f); for (GribCollection.GroupHcs g : canonGc.getGroups()) indexBuilder.addGroups(writeGroupProto(g)); indexBuilder.setCenter(canonGc.getCenter()); indexBuilder.setSubcenter(canonGc.getSubcenter()); indexBuilder.setMaster(canonGc.getMaster()); indexBuilder.setLocal(canonGc.getLocal()); for (TimePartition.Partition p : tp.getPartitions()) { indexBuilder.addPartitions(writePartitionProto(p.getName(), (TimePartition.Partition) p)); } GribCollectionProto.GribCollectionIndex index = indexBuilder.build(); byte[] b = index.toByteArray(); NcStream.writeVInt(raf, b.length); // message size raf.write(b); // message - all in one gulp f.format("GribCollectionTimePartitionedIndex= %d bytes%n", b.length); } finally { f.format("file size = %d bytes%n", raf.length()); raf.close(); } return true; } private GribCollectionProto.Group writeGroupProto(GribCollection.GroupHcs g) throws IOException { GribCollectionProto.Group.Builder b = GribCollectionProto.Group.newBuilder(); b.setGds(ByteString.copyFrom(g.rawGds)); b.setGdsHash(g.gdsHash); for (GribCollection.VariableIndex vb : g.varIndex) b.addVariables(writeVariableProto((TimePartition.VariableIndexPartitioned) vb)); for (int i = 0; i < g.timeCoordPartitions.size(); i++) b.addTimeCoordUnions(writeTimeCoordUnionProto(g.timeCoordPartitions.get(i), i)); List<VertCoord> vertCoords = g.vertCoords; for (int i = 0; i < vertCoords.size(); i++) b.addVertCoords(writeCoordProto(vertCoords.get(i), i)); List<EnsCoord> ensCoords = g.ensCoords; for (int i = 0; i < ensCoords.size(); i++) b.addEnsCoords(writeCoordProto(ensCoords.get(i), i)); return b.build(); } private GribCollectionProto.Variable writeVariableProto(TimePartition.VariableIndexPartitioned v) throws IOException { GribCollectionProto.Variable.Builder b = GribCollectionProto.Variable.newBuilder(); b.setDiscipline(v.discipline); b.setCategory(v.category); b.setParameter(v.parameter); b.setLevelType(v.levelType); b.setIsLayer(v.isLayer); b.setIntervalType(v.intvType); if (v.intvName != null) b.setIntvName(v.intvName); b.setCdmHash(v.cdmHash); b.setRecordsPos(0); b.setRecordsLen(0); b.setTimeIdx(v.partTimeCoordIdx); // note if (v.vertIdx >= 0) b.setVertIdx(v.vertIdx); if (v.ensIdx >= 0) b.setEnsIdx(v.ensIdx); if (v.ensDerivedType >= 0) b.setEnsDerivedType(v.ensDerivedType); // derived type (table 4.7) if (v.probabilityName != null) b.setProbabilityName(v.probabilityName); if (v.probType >= 0) b.setProbabilityType(v.probType); for (int idx : v.groupno) b.addGroupno(idx); for (int idx : v.varno) b.addVarno(idx); for (int idx : v.flag) b.addFlag(idx); return b.build(); } protected GribCollectionProto.TimeCoordUnion writeTimeCoordUnionProto( TimeCoordUnion tcu, int index) throws IOException { GribCollectionProto.TimeCoordUnion.Builder b = GribCollectionProto.TimeCoordUnion.newBuilder(); b.setCode(index); b.setUnit(tcu.getUnits()); if (tcu.isInterval()) { for (TimeCoord.Tinv tinv : tcu.getIntervals()) { b.addValues((float) tinv.getBounds1()); b.addBound((float) tinv.getBounds2()); } } else { for (int value : tcu.getCoords()) b.addValues((float) value); } for (TimeCoordUnion.Val val : tcu.getValues()) { b.addPartition(val.getPartition()); b.addIndex(val.getIndex()); } return b.build(); } private GribCollectionProto.Partition writePartitionProto(String name, TimePartition.Partition p) throws IOException { GribCollectionProto.Partition.Builder b = GribCollectionProto.Partition.newBuilder(); b.setFilename(p.getIndexFilename()); b.setName(name); return b.build(); } /////////////////////////////////////////////////////////////////////////// // reading ncx @Override protected boolean readPartitions(GribCollectionProto.GribCollectionIndex proto) { for (int i = 0; i < proto.getPartitionsCount(); i++) { GribCollectionProto.Partition pp = proto.getPartitions(i); tp.addPartition(pp.getName(), pp.getFilename()); } return proto.getPartitionsCount() > 0; } @Override protected void readTimePartitions( GribCollection.GroupHcs group, GribCollectionProto.Group proto) { List<TimeCoord> list = new ArrayList<TimeCoord>(proto.getTimeCoordUnionsCount()); for (int i = 0; i < proto.getTimeCoordUnionsCount(); i++) { GribCollectionProto.TimeCoordUnion tpu = proto.getTimeCoordUnions(i); list.add(readTimePartition(tpu, i)); } group.timeCoords = list; } /* protected TimeCoord readTimePartition(GribCollectionProto.TimeCoordUnion pc, int timeIndex) { int[] partition = new int[pc.getPartitionCount()]; int[] index = new int[pc.getPartitionCount()]; // better be the same for (int i = 0; i < pc.getPartitionCount(); i++) { partition[i] = pc.getPartition(i); index[i] = pc.getIndex(i); } if (pc.getBoundCount() > 0) { // its an interval List<TimeCoord.Tinv> coords = new ArrayList<TimeCoord.Tinv>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i++) coords.add(new TimeCoord.Tinv((int) pc.getValues(i), (int) pc.getBound(i))); TimeCoordUnion tc = new TimeCoordUnion(pc.getCode(), pc.getUnit(), coords, partition, index); return tc.setIndex( timeIndex); } else { List<Integer> coords = new ArrayList<Integer>(pc.getValuesCount()); for (float value : pc.getValuesList()) coords.add((int) value); TimeCoordUnion tc = new TimeCoordUnion(pc.getCode(), pc.getUnit(), coords, partition, index); return tc.setIndex( timeIndex); } } */ protected TimeCoord readTimePartition(GribCollectionProto.TimeCoordUnion pc, int timeIndex) { int[] partition = new int[pc.getPartitionCount()]; int[] index = new int[pc.getPartitionCount()]; // better be the same for (int i = 0; i < pc.getPartitionCount(); i++) { partition[i] = pc.getPartition(i); index[i] = pc.getIndex(i); } if (pc.getBoundCount() > 0) { // its an interval List<TimeCoord.Tinv> coords = new ArrayList<TimeCoord.Tinv>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i++) coords.add(new TimeCoord.Tinv((int) pc.getValues(i), (int) pc.getBound(i))); TimeCoordUnion tc = new TimeCoordUnion(pc.getCode(), pc.getUnit(), coords, partition, index); return tc.setIndex(timeIndex); } else { List<Integer> coords = new ArrayList<Integer>(pc.getValuesCount()); for (float value : pc.getValuesList()) coords.add((int) value); TimeCoordUnion tc = new TimeCoordUnion(pc.getCode(), pc.getUnit(), coords, partition, index); return tc.setIndex(timeIndex); } } @Override protected GribCollection.VariableIndex readVariable( GribCollectionProto.Variable pv, GribCollection.GroupHcs group) { int discipline = pv.getDiscipline(); int category = pv.getCategory(); int param = pv.getParameter(); int levelType = pv.getLevelType(); int intvType = pv.getIntervalType(); String intvName = pv.getIntvName(); boolean isLayer = pv.getIsLayer(); int ensDerivedType = pv.getEnsDerivedType(); int probType = pv.getProbabilityType(); String probabilityName = pv.getProbabilityName(); int cdmHash = pv.getCdmHash(); long recordsPos = pv.getRecordsPos(); int recordsLen = pv.getRecordsLen(); int timeIdx = pv.getTimeIdx(); int vertIdx = pv.getVertIdx(); int ensIdx = pv.getEnsIdx(); int tableVersion = pv.getTableVersion(); List<Integer> groupnoList = pv.getGroupnoList(); List<Integer> varnoList = pv.getVarnoList(); List<Integer> flagList = pv.getFlagList(); return tp.makeVariableIndex( group, tableVersion, discipline, category, param, levelType, isLayer, intvType, intvName, ensDerivedType, probType, probabilityName, -1, cdmHash, timeIdx, vertIdx, ensIdx, recordsPos, recordsLen, groupnoList, varnoList, flagList); } public static void main(String[] args) throws IOException { Formatter f = new Formatter(); String indexName = (args.length > 0) ? args[0] : "F:/nomads/NOMADS-cfsrr-timeseries.ncx"; RandomAccessFile raf = new RandomAccessFile(indexName, "r"); Grib1TimePartition gtc = Grib1TimePartitionBuilder.createFromIndex("test", null, raf); gtc.showIndex(f); System.out.printf("%s%n", f); } }
/** * Build a GribCollection object. Rectilyse and manage grib collection index. Covers * GribCollectionProto. * * @author caron * @since 4/6/11 */ public class Grib2CollectionBuilder { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(GribCollection.class); public static final String MAGIC_START = "Grib2CollectionIndex"; protected static final int version = 5; private static final boolean debug = false; // from a single file, read in the index, create if it doesnt exist public static GribCollection createFromSingleFile( File file, CollectionManager.Force force, Formatter f) throws IOException { Grib2CollectionBuilder builder = new Grib2CollectionBuilder(file, f); builder.readOrCreateIndex(force, f); return builder.gc; } // from a collection, read in the index, create if it doesnt exist or is out of date // assume that the CollectionManager is up to date, eg doesnt need to be scanned public static GribCollection factory( CollectionManager dcm, CollectionManager.Force force, Formatter f) throws IOException { Grib2CollectionBuilder builder = new Grib2CollectionBuilder(dcm); builder.readOrCreateIndex(force, f); return builder.gc; } // read in the index, index raf already open public static GribCollection createFromIndex(String name, File directory, RandomAccessFile raf) throws IOException { Grib2CollectionBuilder builder = new Grib2CollectionBuilder(name, directory); if (builder.readIndex(raf)) return builder.gc; throw new IOException("Reading index failed"); } // this writes the index always public static boolean writeIndexFile(File indexFile, CollectionManager dcm, Formatter f) throws IOException { Grib2CollectionBuilder builder = new Grib2CollectionBuilder(dcm); return builder.createIndex(indexFile, CollectionManager.Force.always, f); } //////////////////////////////////////////////////////////////// private final List<CollectionManager> collections = new ArrayList<CollectionManager>(); protected GribCollection gc; // single file private Grib2CollectionBuilder(File file, Formatter f) throws IOException { try { // String spec = StringUtil2.substitute(file.getPath(), "\\", "/"); CollectionManager dcm = new DatasetCollectionSingleFile(file); this.collections.add(dcm); this.gc = new Grib2Collection(file.getName(), new File(dcm.getRoot())); } catch (Exception e) { ByteArrayOutputStream bos = new ByteArrayOutputStream(10000); e.printStackTrace(new PrintStream(bos)); f.format("%s", bos.toString()); throw new IOException(e); } } private Grib2CollectionBuilder(CollectionManager dcm) { this.collections.add(dcm); this.gc = new Grib2Collection(dcm.getCollectionName(), new File(dcm.getRoot())); } private Grib2CollectionBuilder(String name, File directory) { this.gc = new Grib2Collection(name, directory); } protected Grib2CollectionBuilder() { this.gc = null; } protected int getVersion() { return version; } // read or create index private void readOrCreateIndex(CollectionManager.Force ff, Formatter f) throws IOException { // force new index or test for new index needed boolean force = ((ff == CollectionManager.Force.always) || (ff == CollectionManager.Force.test && needsUpdate())); // otherwise, we're good as long as the index file exists File idx = gc.getIndexFile(); if (force || !idx.exists() || !readIndex(idx.getPath())) { logger.info("GribCollection {}: createIndex {}", gc.getName(), idx.getPath()); createIndex(idx, ff, f); // write out index gc.rafLocation = idx.getPath(); gc.setRaf(new RandomAccessFile(idx.getPath(), "r")); readIndex(gc.getRaf()); // read back in index } } public boolean needsUpdate() { File idx = gc.getIndexFile(); return !idx.exists() || needsUpdate(idx.lastModified()); } private boolean needsUpdate(long idxLastModified) { CollectionManager.ChangeChecker cc = Grib2Index.getChangeChecker(); for (CollectionManager dcm : collections) { for (MFile mfile : dcm.getFiles()) { if (cc.hasChangedSince(mfile, idxLastModified)) return true; } } return false; } //////////////////////////////////////////////////////////////////////////////////////////////////// // reading public boolean readIndex(String filename) throws IOException { return readIndex(new RandomAccessFile(filename, "r")); } public boolean readIndex(RandomAccessFile raf) { gc.setRaf(raf); // LOOK leaving the raf open in the GribCollection try { raf.order(RandomAccessFile.BIG_ENDIAN); raf.seek(0); //// header message if (!NcStream.readAndTest(raf, MAGIC_START.getBytes())) { logger.error("GribCollection {}: invalid index", gc.getName()); return false; } int v = raf.readInt(); if (v != getVersion()) { logger.warn( "GribCollection {}: index found version={}, want version= {} on file {}", new Object[] {gc.getName(), v, version, raf.getLocation()}); return false; } long skip = raf.readLong(); raf.skipBytes(skip); int size = NcStream.readVInt(raf); if ((size < 0) || (size > 100 * 1000 * 1000)) { logger.warn("GribCollection {}: invalid index ", gc.getName()); return false; } byte[] m = new byte[size]; raf.readFully(m); GribCollectionProto.GribCollectionIndex proto = GribCollectionProto.GribCollectionIndex.parseFrom(m); gc.center = proto.getCenter(); gc.subcenter = proto.getSubcenter(); gc.master = proto.getMaster(); gc.local = proto.getLocal(); gc.genProcessType = proto.getGenProcessType(); gc.genProcessId = proto.getGenProcessId(); gc.backProcessId = proto.getBackProcessId(); gc.local = proto.getLocal(); // gc.tables = Grib2Tables.factory(gc.center, gc.subcenter, gc.master, gc.local); gc.filenames = new ArrayList<String>(proto.getFilesCount()); for (int i = 0; i < proto.getFilesCount(); i++) gc.filenames.add(proto.getFiles(i)); // error condition on a GribCollection Index if ((proto.getFilesCount() == 0) && !(this instanceof TimePartitionBuilder)) { logger.warn("GribCollection {}: has no files, force recreate ", gc.getName()); return false; } gc.groups = new ArrayList<GribCollection.GroupHcs>(proto.getGroupsCount()); for (int i = 0; i < proto.getGroupsCount(); i++) gc.groups.add(readGroup(proto.getGroups(i), gc.makeGroup())); Collections.sort(gc.groups); gc.params = new ArrayList<Parameter>(proto.getParamsCount()); for (int i = 0; i < proto.getParamsCount(); i++) gc.params.add(readParam(proto.getParams(i))); if (!readPartitions(proto)) { logger.warn("TimePartition {}: has no partitions, force recreate ", gc.getName()); return false; } return true; } catch (Throwable t) { logger.error("Error reading index " + raf.getLocation(), t); return false; } } protected boolean readPartitions(GribCollectionProto.GribCollectionIndex proto) { return true; } protected void readTimePartitions( GribCollection.GroupHcs group, GribCollectionProto.Group proto) { // NOOP } GribCollection.GroupHcs readGroup(GribCollectionProto.Group p, GribCollection.GroupHcs group) throws IOException { Grib2SectionGridDefinition gdss = new Grib2SectionGridDefinition(p.getGds().toByteArray()); Grib2Gds gds = gdss.getGDS(); group.setHorizCoordSystem(gds.makeHorizCoordSys()); group.varIndex = new ArrayList<GribCollection.VariableIndex>(); for (int i = 0; i < p.getVariablesCount(); i++) group.varIndex.add(readVariable(p.getVariables(i), group)); Collections.sort(group.varIndex); group.timeCoords = new ArrayList<TimeCoord>(p.getTimeCoordsCount()); for (int i = 0; i < p.getTimeCoordsCount(); i++) group.timeCoords.add(readTimeCoord(p.getTimeCoords(i))); group.vertCoords = new ArrayList<VertCoord>(p.getVertCoordsCount()); for (int i = 0; i < p.getVertCoordsCount(); i++) group.vertCoords.add(readVertCoord(p.getVertCoords(i))); group.ensCoords = new ArrayList<EnsCoord>(p.getEnsCoordsCount()); for (int i = 0; i < p.getEnsCoordsCount(); i++) group.ensCoords.add(readEnsCoord(p.getEnsCoords(i))); group.filenose = new int[p.getFilenoCount()]; for (int i = 0; i < p.getFilenoCount(); i++) group.filenose[i] = p.getFileno(i); readTimePartitions(group, p); // finish for (GribCollection.VariableIndex vi : group.varIndex) { TimeCoord tc = group.timeCoords.get(vi.timeIdx); vi.ntimes = tc.getSize(); VertCoord vc = (vi.vertIdx < 0) ? null : group.vertCoords.get(vi.vertIdx); vi.nverts = (vc == null) ? 0 : vc.getSize(); EnsCoord ec = (vi.ensIdx < 0) ? null : group.ensCoords.get(vi.ensIdx); vi.nens = (ec == null) ? 0 : ec.getSize(); } // group.assignVertNames(); return group; } private Parameter readParam(GribCollectionProto.Parameter pp) throws IOException { if (pp.hasSdata()) return new Parameter(pp.getName(), pp.getSdata()); int count = 0; double[] vals = new double[pp.getDataCount()]; for (double val : pp.getDataList()) vals[count++] = val; return new Parameter(pp.getName(), vals); } private TimeCoord readTimeCoord(GribCollectionProto.Coord pc) throws IOException { if (pc.getBoundCount() > 0) { // its an interval List<TimeCoord.Tinv> coords = new ArrayList<TimeCoord.Tinv>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i++) coords.add(new TimeCoord.Tinv((int) pc.getValues(i), (int) pc.getBound(i))); return new TimeCoord(pc.getCode(), pc.getUnit(), coords); } else { List<Integer> coords = new ArrayList<Integer>(pc.getValuesCount()); for (float value : pc.getValuesList()) coords.add((int) value); return new TimeCoord(pc.getCode(), pc.getUnit(), coords); } } private VertCoord readVertCoord(GribCollectionProto.Coord pc) throws IOException { boolean isLayer = (pc.getBoundCount() > 0); List<VertCoord.Level> coords = new ArrayList<VertCoord.Level>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i++) coords.add(new VertCoord.Level(pc.getValues(i), isLayer ? pc.getBound(i) : 0)); return new VertCoord(pc.getCode(), coords, isLayer); } private EnsCoord readEnsCoord(GribCollectionProto.Coord pc) throws IOException { List<EnsCoord.Coord> coords = new ArrayList<EnsCoord.Coord>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i += 2) coords.add(new EnsCoord.Coord((int) pc.getValues(i), (int) pc.getValues(i + 1))); return new EnsCoord(coords); } protected GribCollection.VariableIndex readVariable( GribCollectionProto.Variable pv, GribCollection.GroupHcs group) { int discipline = pv.getDiscipline(); int category = pv.getCategory(); int param = pv.getParameter(); int levelType = pv.getLevelType(); int intvType = pv.getIntervalType(); boolean isLayer = pv.getIsLayer(); int ensDerivedType = pv.getEnsDerivedType(); int probType = pv.getProbabilityType(); String probabilityName = pv.getProbabilityName(); int cdmHash = pv.getCdmHash(); long recordsPos = pv.getRecordsPos(); int recordsLen = pv.getRecordsLen(); int timeIdx = pv.getTimeIdx(); int vertIdx = pv.getVertIdx(); int ensIdx = pv.getEnsIdx(); int tableVersion = pv.getTableVersion(); return gc.makeVariableIndex( group, tableVersion, discipline, category, param, levelType, isLayer, intvType, ensDerivedType, probType, probabilityName, cdmHash, timeIdx, vertIdx, ensIdx, recordsPos, recordsLen); } /////////////////////////////////////////////////////////////////////////////////// // writing private class Group { public Grib2SectionGridDefinition gdss; public int gdsHash; // may have been modified public Grib2Rectilyser rect; public List<Grib2Record> records = new ArrayList<Grib2Record>(); public String name; public Set<Integer> fileSet; // this is so we can show just the component files that are in this group private Group(Grib2SectionGridDefinition gdss, int gdsHash) { this.gdss = gdss; this.gdsHash = gdsHash; Grib2Gds gds = gdss.getGDS(); name = gds.getNameShort() + "-" + gds.ny + "X" + gds.nx; } } /////////////////////////////////////////////////// // create the index private boolean createIndex(File indexFile, CollectionManager.Force ff, Formatter f) throws IOException { long start = System.currentTimeMillis(); ArrayList<String> filenames = new ArrayList<String>(); List<Group> groups = makeAggregatedGroups(filenames, ff, f); createIndex(indexFile, groups, filenames, f); long took = System.currentTimeMillis() - start; f.format("That took %d msecs%n", took); return true; } // read all records in all files, // divide into groups based on GDS hash // each group has an arraylist of all records that belong to it. // for each group, run rectlizer to derive the coordinates and variables public List<Group> makeAggregatedGroups( ArrayList<String> filenames, CollectionManager.Force force, Formatter f) throws IOException { Map<Integer, Group> gdsMap = new HashMap<Integer, Group>(); f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName()); int total = 0; int fileno = 0; for (CollectionManager dcm : collections) { // dcm.scanIfNeeded(); // LOOK ?? f.format(" dcm= %s%n", dcm); Map<Integer, Integer> gdsConvert = (Map<Integer, Integer>) dcm.getAuxInfo("gdsHash"); for (MFile mfile : dcm.getFiles()) { // f.format("%3d: %s%n", fileno, mfile.getPath()); filenames.add(mfile.getPath()); Grib2Index index = new Grib2Index(); try { if (!index.readIndex( mfile.getPath(), mfile.getLastModified(), force)) { // heres where the index date is checked against the data file index.makeIndex(mfile.getPath(), f); f.format( " Index written: %s == %d records %n", mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size()); } else if (debug) { f.format( " Index read: %s == %d records %n", mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size()); } } catch (IOException ioe) { f.format( "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n skipping %s%n", ioe.getMessage(), mfile.getPath() + Grib2Index.IDX_EXT); continue; } for (Grib2Record gr : index.getRecords()) { gr.setFile(fileno); // each record tracks which file it belongs to int gdsHash = gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records if (gdsConvert != null && gdsConvert.get(gdsHash) != null) { // allow external config to muck with gdsHash. Why? because of error in // encoding gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching } Group g = gdsMap.get(gdsHash); if (g == null) { g = new Group(gr.getGDSsection(), gdsHash); gdsMap.put(gdsHash, g); } g.records.add(gr); total++; } fileno++; } } f.format(" total grib records= %d%n", total); Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter(); List<Group> result = new ArrayList<Group>(gdsMap.values()); for (Group g : result) { g.rect = new Grib2Rectilyser(g.records, g.gdsHash); f.format(" GDS hash %d == ", g.gdsHash); g.rect.make(f, c); } f.format( " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n", c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records); return result; } /* MAGIC_START version sizeRecords VariableRecords (sizeRecords bytes) sizeIndex GribCollectionIndex (sizeIndex bytes) */ private void createIndex( File indexFile, List<Group> groups, ArrayList<String> filenames, Formatter f) throws IOException { Grib2Record first = null; // take global metadata from here if (indexFile.exists()) indexFile.delete(); // replace it f.format(" createIndex for %s%n", indexFile.getPath()); RandomAccessFile raf = new RandomAccessFile(indexFile.getPath(), "rw"); raf.order(RandomAccessFile.BIG_ENDIAN); try { //// header message raf.write(MAGIC_START.getBytes("UTF-8")); raf.writeInt(version); long lenPos = raf.getFilePointer(); raf.writeLong(0); // save space to write the length of the record section long countBytes = 0; int countRecords = 0; for (Group g : groups) { g.fileSet = new HashSet<Integer>(); for (Grib2Rectilyser.VariableBag vb : g.rect.getGribvars()) { if (first == null) first = vb.first; GribCollectionProto.VariableRecords vr = writeRecordsProto(vb, g.fileSet); byte[] b = vr.toByteArray(); vb.pos = raf.getFilePointer(); vb.length = b.length; raf.write(b); countBytes += b.length; countRecords += vb.recordMap.length; } } long bytesPerRecord = countBytes / ((countRecords == 0) ? 1 : countRecords); f.format( " write RecordMaps: bytes = %d record = %d bytesPerRecord=%d%n", countBytes, countRecords, bytesPerRecord); if (first == null) { logger.error("GribCollection {}: has no files\n{}", gc.getName(), f.toString()); throw new IllegalArgumentException("GribCollection " + gc.getName() + " has no files"); } long pos = raf.getFilePointer(); raf.seek(lenPos); raf.writeLong(countBytes); raf.seek(pos); // back to the output. GribCollectionProto.GribCollectionIndex.Builder indexBuilder = GribCollectionProto.GribCollectionIndex.newBuilder(); indexBuilder.setName(gc.getName()); for (String fn : filenames) indexBuilder.addFiles(fn); for (Group g : groups) indexBuilder.addGroups(writeGroupProto(g)); /* int count = 0; for (DatasetCollectionManager dcm : collections) { indexBuilder.addParams(makeParamProto(new Parameter("spec" + count, dcm.()))); count++; } */ // what about just storing first ?? Grib2SectionIdentification ids = first.getId(); indexBuilder.setCenter(ids.getCenter_id()); indexBuilder.setSubcenter(ids.getSubcenter_id()); indexBuilder.setMaster(ids.getMaster_table_version()); indexBuilder.setLocal(ids.getLocal_table_version()); Grib2Pds pds = first.getPDS(); indexBuilder.setGenProcessType(pds.getGenProcessType()); indexBuilder.setGenProcessId(pds.getGenProcessId()); indexBuilder.setBackProcessId(pds.getBackProcessId()); GribCollectionProto.GribCollectionIndex index = indexBuilder.build(); byte[] b = index.toByteArray(); NcStream.writeVInt(raf, b.length); // message size raf.write(b); // message - all in one gulp f.format(" write GribCollectionIndex= %d bytes%n", b.length); } finally { f.format(" file size = %d bytes%n", raf.length()); raf.close(); if (raf != null) raf.close(); } } /* private void createIndexForGroup(Group group, ArrayList<String> filenames) throws IOException { Grib2Record first = null; // take global metadata from here File file = new File(gc.getDirectory(), group.name + GribCollection.IDX_EXT); if (file.exists()) file.delete(); // replace it RandomAccessFile raf = new RandomAccessFile(file.getPath(), "rw"); raf.order(RandomAccessFile.BIG_ENDIAN); try { //// header message String magic = gc.getMagicBytes(); raf.write(magic.getBytes("UTF-8")); raf.writeInt(version); long lenPos = raf.getFilePointer(); raf.writeLong(0); // save space to write the length of the record section long countBytes = 0; int countRecords = 0; group.fileSet = new HashSet<Integer>(); for (Rectilyser.VariableBag vb : group.rect.getGribvars()) { if (first == null) first = vb.first; GribCollectionProto.VariableRecords vr = makeRecordsProto(vb, group.fileSet); byte[] b = vr.toByteArray(); vb.pos = raf.getFilePointer(); vb.length = b.length; raf.write(b); countBytes += b.length; } countRecords += group.records.size(); if (countRecords == 0) countRecords = 1; long bytesPerRecord = countBytes / countRecords; logger.debug("VariableRecords: bytes = {} record = {} bytesPerRecord={}", new Object[] {countBytes, countRecords, bytesPerRecord}); long pos = raf.getFilePointer(); raf.seek(lenPos); raf.writeLong(countBytes); raf.seek(pos); // back to the output. GribCollectionProto.GribCollectionIndex.Builder indexBuilder = GribCollectionProto.GribCollectionIndex.newBuilder(); indexBuilder.setName(group.name); for (String fn : filenames) indexBuilder.addFiles(fn); indexBuilder.addGroups(makeGroupProto(group)); int count = 0; for (CollectionManager dcm : collections) { indexBuilder.addParams(makeParamProto(new Parameter("spec" + count, dcm.toString()))); count++; } Grib2SectionIdentification ids = first.getId(); indexBuilder.setCenter(ids.getCenter_id()); indexBuilder.setSubcenter(ids.getSubcenter_id()); indexBuilder.setMaster(ids.getMaster_table_version()); indexBuilder.setLocal(ids.getLocal_table_version()); GribCollectionProto.GribCollectionIndex index = indexBuilder.build(); byte[] b = index.toByteArray(); NcStream.writeVInt(raf, b.length); // message size raf.write(b); // message - all in one gulp logger.debug("GribCollectionIndex= {} bytes%n", b.length); } finally { logger.debug("file size = {} bytes%n", raf.length()); raf.close(); if (raf != null) raf.close(); } } */ private GribCollectionProto.VariableRecords writeRecordsProto( Grib2Rectilyser.VariableBag vb, Set<Integer> fileSet) throws IOException { GribCollectionProto.VariableRecords.Builder b = GribCollectionProto.VariableRecords.newBuilder(); b.setCdmHash(vb.first.cdmVariableHash(0)); for (Grib2Rectilyser.Record ar : vb.recordMap) { GribCollectionProto.Record.Builder br = GribCollectionProto.Record.newBuilder(); if (ar == null || ar.gr == null) { br.setFileno(0); br.setPos(0); // missing : ok to use 0 since drsPos > 0 } else { br.setFileno(ar.gr.getFile()); fileSet.add(ar.gr.getFile()); Grib2SectionDataRepresentation drs = ar.gr.getDataRepresentationSection(); br.setPos(drs.getStartingPosition()); } b.addRecords(br); } return b.build(); } private GribCollectionProto.Group writeGroupProto(Group g) throws IOException { GribCollectionProto.Group.Builder b = GribCollectionProto.Group.newBuilder(); b.setGds(ByteString.copyFrom(g.gdss.getRawBytes())); for (Grib2Rectilyser.VariableBag vb : g.rect.getGribvars()) b.addVariables(writeVariableProto(vb)); List<TimeCoord> timeCoords = g.rect.getTimeCoords(); for (int i = 0; i < timeCoords.size(); i++) b.addTimeCoords(writeCoordProto(timeCoords.get(i), i)); List<VertCoord> vertCoords = g.rect.getVertCoords(); for (int i = 0; i < vertCoords.size(); i++) b.addVertCoords(writeCoordProto(vertCoords.get(i), i)); List<EnsCoord> ensCoords = g.rect.getEnsCoords(); for (int i = 0; i < ensCoords.size(); i++) b.addEnsCoords(writeCoordProto(ensCoords.get(i), i)); for (Integer aFileSet : g.fileSet) b.addFileno(aFileSet); return b.build(); } private GribCollectionProto.Variable writeVariableProto(Grib2Rectilyser.VariableBag vb) throws IOException { GribCollectionProto.Variable.Builder b = GribCollectionProto.Variable.newBuilder(); b.setDiscipline(vb.first.getDiscipline()); Grib2Pds pds = vb.first.getPDS(); b.setCategory(pds.getParameterCategory()); b.setParameter(pds.getParameterNumber()); b.setLevelType(pds.getLevelType1()); b.setIsLayer(Grib2Utils.isLayer(vb.first)); b.setIntervalType(pds.getStatisticalProcessType()); b.setCdmHash(vb.first.cdmVariableHash(0)); b.setRecordsPos(vb.pos); b.setRecordsLen(vb.length); b.setTimeIdx(vb.timeCoordIndex); if (vb.vertCoordIndex >= 0) b.setVertIdx(vb.vertCoordIndex); if (vb.ensCoordIndex >= 0) b.setEnsIdx(vb.ensCoordIndex); if (pds.isEnsembleDerived()) { Grib2Pds.PdsEnsembleDerived pdsDerived = (Grib2Pds.PdsEnsembleDerived) pds; b.setEnsDerivedType(pdsDerived.getDerivedForecastType()); // derived type (table 4.7) } if (pds.isProbability()) { Grib2Pds.PdsProbability pdsProb = (Grib2Pds.PdsProbability) pds; b.setProbabilityName(pdsProb.getProbabilityName()); b.setProbabilityType(pdsProb.getProbabilityType()); } return b.build(); } protected GribCollectionProto.Parameter writeParamProto(Parameter param) throws IOException { GribCollectionProto.Parameter.Builder b = GribCollectionProto.Parameter.newBuilder(); b.setName(param.getName()); if (param.isString()) b.setSdata(param.getStringValue()); else { for (int i = 0; i < param.getLength(); i++) b.addData(param.getNumericValue(i)); } return b.build(); } protected GribCollectionProto.Coord writeCoordProto(TimeCoord tc, int index) throws IOException { GribCollectionProto.Coord.Builder b = GribCollectionProto.Coord.newBuilder(); b.setCode(index); b.setUnit(tc.getUnits()); float scale = (float) tc.getTimeUnitScale(); // deal with, eg, "6 hours" by multiplying values by 6 if (tc.isInterval()) { for (TimeCoord.Tinv tinv : tc.getIntervals()) { b.addValues(tinv.getBounds1() * scale); b.addBound(tinv.getBounds2() * scale); } } else { for (int value : tc.getCoords()) b.addValues(value * scale); } return b.build(); } protected GribCollectionProto.Coord writeCoordProto(VertCoord vc, int index) throws IOException { GribCollectionProto.Coord.Builder b = GribCollectionProto.Coord.newBuilder(); b.setCode(vc.getCode()); b.setUnit(vc.getUnits()); for (VertCoord.Level coord : vc.getCoords()) { if (vc.isLayer()) { b.addValues((float) coord.getValue1()); b.addBound((float) coord.getValue2()); } else { b.addValues((float) coord.getValue1()); } } return b.build(); } protected GribCollectionProto.Coord writeCoordProto(EnsCoord ec, int index) throws IOException { GribCollectionProto.Coord.Builder b = GribCollectionProto.Coord.newBuilder(); b.setCode(0); b.setUnit(""); for (EnsCoord.Coord coord : ec.getCoords()) { b.addValues((float) coord.getCode()); b.addValues((float) coord.getEnsMember()); } return b.build(); } }