private VertCoord readVertCoord(GribCollectionProto.Coord pc) throws IOException { boolean isLayer = (pc.getBoundCount() > 0); List<VertCoord.Level> coords = new ArrayList<VertCoord.Level>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i++) coords.add(new VertCoord.Level(pc.getValues(i), isLayer ? pc.getBound(i) : 0)); return new VertCoord(pc.getCode(), coords, isLayer); }
private boolean createPartitionedIndex(Formatter f) throws IOException { long start = System.currentTimeMillis(); // create partitions based on TimePartitionCollections object for (CollectionManager dcm : tpc.makePartitions()) { tp.addPartition(dcm); } List<TimePartition.Partition> bad = new ArrayList<TimePartition.Partition>(); for (TimePartition.Partition dc : tp.getPartitions()) { try { dc.makeGribCollection(f); // ensure collection has been read successfully if (trace) f.format(" Open partition %s%n", dc.getDcm().getCollectionName()); } catch (Throwable t) { logger.error(" Failed to open partition " + dc.getName(), t); f.format(" FAIL on partition %s (remove) %n", dc.getDcm().getCollectionName()); bad.add(dc); // LOOK may be a file leak ? } } // remove ones that failed for (TimePartition.Partition p : bad) tp.removePartition(p); // choose the "canonical" partition, aka prototype int n = tp.getPartitions().size(); if (n == 0) { logger.error(" Nothing in this partition = " + tp.getName()); f.format(" FAIL Partition empty collection = %s%n", tp.getName()); return false; } int idx = tpc.getProtoIndex(n); TimePartition.Partition canon = tp.getPartitions().get(idx); f.format(" Using canonical partition %s%n", canon.getDcm().getCollectionName()); // check consistency across vert and ens coords if (!checkPartitions(canon, f)) { logger.error( " Partition check failed, index not written on {} message = {}", tp.getName(), f.toString()); f.format(" FAIL Partition check collection = %s%n", tp.getName()); return false; } // make the time coordinates, place results into canon createPartitionedTimeCoordinates(canon, f); // ready to write the index file writeIndex(canon, f); // close open gc's tp.cleanup(); long took = System.currentTimeMillis() - start; f.format(" CreatePartitionedIndex took %d msecs%n", took); return true; }
@Override protected void readTimePartitions( GribCollection.GroupHcs group, GribCollectionProto.Group proto) { List<TimeCoord> list = new ArrayList<TimeCoord>(proto.getTimeCoordUnionsCount()); for (int i = 0; i < proto.getTimeCoordUnionsCount(); i++) { GribCollectionProto.TimeCoordUnion tpu = proto.getTimeCoordUnions(i); list.add(readTimePartition(tpu, i)); } group.timeCoords = list; }
private GribCollectionProto.Group writeGroupProto(Group g) throws IOException { GribCollectionProto.Group.Builder b = GribCollectionProto.Group.newBuilder(); b.setGds(ByteString.copyFrom(g.gdss.getRawBytes())); b.setGdsHash(g.gdsHash); for (Grib2Rectilyser.VariableBag vb : g.rect.getGribvars()) b.addVariables(writeVariableProto(g.rect, vb)); List<TimeCoord> timeCoords = g.rect.getTimeCoords(); for (int i = 0; i < timeCoords.size(); i++) b.addTimeCoords(writeCoordProto(timeCoords.get(i), i)); List<VertCoord> vertCoords = g.rect.getVertCoords(); for (int i = 0; i < vertCoords.size(); i++) b.addVertCoords(writeCoordProto(vertCoords.get(i), i)); List<EnsCoord> ensCoords = g.rect.getEnsCoords(); for (int i = 0; i < ensCoords.size(); i++) b.addEnsCoords(writeCoordProto(ensCoords.get(i), i)); for (Integer aFileSet : g.fileSet) b.addFileno(aFileSet); if (g.nameOverride != null) b.setName(g.nameOverride); return b.build(); }
private TimeCoord readTimeCoord(GribCollectionProto.Coord pc) throws IOException { if (pc.getBoundCount() > 0) { // its an interval List<TimeCoord.Tinv> coords = new ArrayList<TimeCoord.Tinv>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i++) coords.add(new TimeCoord.Tinv((int) pc.getValues(i), (int) pc.getBound(i))); return new TimeCoord(pc.getCode(), pc.getUnit(), coords); } else { List<Integer> coords = new ArrayList<Integer>(pc.getValuesCount()); for (float value : pc.getValuesList()) coords.add((int) value); return new TimeCoord(pc.getCode(), pc.getUnit(), coords); } }
protected TimeCoord readTimePartition(GribCollectionProto.TimeCoordUnion pc, int timeIndex) { int[] partition = new int[pc.getPartitionCount()]; int[] index = new int[pc.getPartitionCount()]; // better be the same for (int i = 0; i < pc.getPartitionCount(); i++) { partition[i] = pc.getPartition(i); index[i] = pc.getIndex(i); } if (pc.getBoundCount() > 0) { // its an interval List<TimeCoord.Tinv> coords = new ArrayList<TimeCoord.Tinv>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i++) coords.add(new TimeCoord.Tinv((int) pc.getValues(i), (int) pc.getBound(i))); TimeCoordUnion tc = new TimeCoordUnion(pc.getCode(), pc.getUnit(), coords, partition, index); return tc.setIndex(timeIndex); } else { List<Integer> coords = new ArrayList<Integer>(pc.getValuesCount()); for (float value : pc.getValuesList()) coords.add((int) value); TimeCoordUnion tc = new TimeCoordUnion(pc.getCode(), pc.getUnit(), coords, partition, index); return tc.setIndex(timeIndex); } }
private GribCollectionProto.Group writeGroupProto(GribCollection.GroupHcs g) throws IOException { GribCollectionProto.Group.Builder b = GribCollectionProto.Group.newBuilder(); b.setGds(ByteString.copyFrom(g.rawGds)); b.setGdsHash(g.gdsHash); for (GribCollection.VariableIndex vb : g.varIndex) b.addVariables(writeVariableProto((TimePartition.VariableIndexPartitioned) vb)); for (int i = 0; i < g.timeCoordPartitions.size(); i++) b.addTimeCoordUnions(writeTimeCoordUnionProto(g.timeCoordPartitions.get(i), i)); List<VertCoord> vertCoords = g.vertCoords; for (int i = 0; i < vertCoords.size(); i++) b.addVertCoords(writeCoordProto(vertCoords.get(i), i)); List<EnsCoord> ensCoords = g.ensCoords; for (int i = 0; i < ensCoords.size(); i++) b.addEnsCoords(writeCoordProto(ensCoords.get(i), i)); return b.build(); }
private boolean createPartitionedTimeCoordinates(TimePartition.Partition canon, Formatter f) throws IOException { List<TimePartition.Partition> partitions = tp.getPartitions(); boolean ok = true; // for each group in canonical Partition for (GribCollection.GroupHcs firstGroup : canon.makeGribCollection(f).getGroups()) { String gname = firstGroup.getId(); if (trace) f.format(" Check Group %s%n", gname); // get list of corresponding groups from all the time partition, so we dont have to keep // looking it up List<PartGroup> pgList = new ArrayList<PartGroup>(partitions.size()); for (TimePartition.Partition dc : partitions) { GribCollection.GroupHcs gg = dc.makeGribCollection(f).findGroupById(gname); if (gg == null) logger.error(" Cant find group {} in partition {}", gname, dc.getName()); else pgList.add(new PartGroup(gg, dc)); } // unique time coordinate unions List<TimeCoordUnion> unionList = new ArrayList<TimeCoordUnion>(); // for each variable in canonical Partition for (GribCollection.VariableIndex viCanon : firstGroup.varIndex) { if (trace) f.format(" Check variable %s%n", viCanon); TimeCoord tcCanon = viCanon.getTimeCoord(); List<TimeCoord> tcPartitions = new ArrayList<TimeCoord>(pgList.size()); // for each partition, get the time index for (PartGroup pg : pgList) { // get corresponding variable GribCollection.VariableIndex vi2 = pg.group.findVariableByHash(viCanon.cdmHash); if (vi2 == null) { // apparently not in the file f.format( " WARN Cant find variable %s in partition %s / %s%n", viCanon, pg.tpp.getName(), pg.group.getId()); tcPartitions.add(null); } else { if (vi2.timeIdx < 0 || vi2.timeIdx >= pg.group.timeCoords.size()) { logger.error(" timeIdx out of range var= {} on partition {}", vi2, pg.tpp.getName()); tcPartitions.add(null); } else { TimeCoord tc2 = vi2.getTimeCoord(); if (tc2.isInterval() != tcCanon.isInterval()) { logger.error( " timeIdx wrong interval type var= {} on partition {}", vi2, pg.tpp.getName()); tcPartitions.add(null); } else { tcPartitions.add(tc2); } } } } // union of time coordinates TimeCoordUnion union = new TimeCoordUnion(tcCanon.getCode(), tcPartitions, tcCanon); // store result in the first group viCanon.partTimeCoordIdx = TimeCoordUnion.findUnique(unionList, union); // this merges identical TimeCoordUnion } /* turn TimeIndex into TimeCoord for (int tidx = 0; tidx <unionList.size(); tidx++) { TimeCoordUnion union = unionList.get(tidx); f.format(" %s %d: timeIndexList=", firstGroup.hcs.getName(), tidx); for (int idx : union.) f.format("%d,",idx); f.format("%n"); } */ // store results in first group firstGroup.timeCoordPartitions = unionList; } return ok; }
// consistency check on variables : compare each variable to corresponding one in proto // also set the groupno and partno for each partition private boolean checkPartitions(TimePartition.Partition canon, Formatter f) throws IOException { List<TimePartition.Partition> partitions = tp.getPartitions(); int npart = partitions.size(); boolean ok = true; // for each group in canonical Partition GribCollection canonGc = canon.makeGribCollection(f); for (GribCollection.GroupHcs firstGroup : canonGc.getGroups()) { String gname = firstGroup.getId(); if (trace) f.format(" Check Group %s%n", gname); // hash proto variables for quick lookup Map<Integer, GribCollection.VariableIndex> check = new HashMap<Integer, GribCollection.VariableIndex>(firstGroup.varIndex.size()); List<GribCollection.VariableIndex> varIndexP = new ArrayList<GribCollection.VariableIndex>(firstGroup.varIndex.size()); for (GribCollection.VariableIndex vi : firstGroup.varIndex) { TimePartition.VariableIndexPartitioned vip = tp.makeVariableIndexPartitioned(vi, npart); varIndexP.add(vip); check.put(vi.cdmHash, vip); // replace with its evil twin } firstGroup.varIndex = varIndexP; // replace with its evil twin // for each partition for (int partno = 0; partno < npart; partno++) { TimePartition.Partition tpp = partitions.get(partno); if (trace) f.format(" Check Partition %s%n", tpp.getName()); // get corresponding group GribCollection gc = tpp.makeGribCollection(f); int groupIdx = gc.findGroupIdxById(firstGroup.getId()); if (groupIdx < 0) { f.format(" Cant find group %s in partition %s%n", gname, tpp.getName()); ok = false; continue; } GribCollection.GroupHcs group = gc.getGroup(groupIdx); // for each variable in partition group for (int varIdx = 0; varIdx < group.varIndex.size(); varIdx++) { GribCollection.VariableIndex vi2 = group.varIndex.get(varIdx); if (trace) f.format(" Check variable %s%n", vi2); int flag = 0; GribCollection.VariableIndex vi1 = check.get(vi2.cdmHash); // compare with proto variable if (vi1 == null) { f.format( " WARN Cant find variable %s from %s in proto - ignoring that variable%n", vi2, tpp.getName()); continue; // we can tolerate this } // compare vert coordinates VertCoord vc1 = vi1.getVertCoord(); VertCoord vc2 = vi2.getVertCoord(); if ((vc1 == null) != (vc2 == null)) { f.format( " ERR Vert coordinates existence on variable %s in %s doesnt match%n", vi2, tpp.getName()); ok = false; } else if ((vc1 != null) && !vc1.equalsData(vc2)) { f.format( " WARN Vert coordinates values on variable %s in %s dont match%n", vi2, tpp.getName()); f.format(" canon vc = %s%n", vc1); f.format(" this vc = %s%n", vc2); flag |= TimePartition.VERT_COORDS_DIFFER; } // compare ens coordinates EnsCoord ec1 = vi1.getEnsCoord(); EnsCoord ec2 = vi2.getEnsCoord(); if ((ec1 == null) != (ec2 == null)) { f.format( " ERR Ensemble coordinates existence on variable %s in %s doesnt match%n", vi2, tpp.getName()); ok = false; } else if ((ec1 != null) && !ec1.equalsData(ec2)) { f.format( " WARN Ensemble coordinates values on variable %s in %s dont match%n", vi2, tpp.getName()); f.format(" canon ec = %s%n", ec1); f.format(" this ec = %s%n", ec2); flag |= TimePartition.ENS_COORDS_DIFFER; } ((TimePartition.VariableIndexPartitioned) vi1) .setPartitionIndex(partno, groupIdx, varIdx, flag); } // loop over variable } // loop over partition } // loop over group if (ok) f.format(" Partition check: vert, ens coords OK%n"); return ok; }
private EnsCoord readEnsCoord(GribCollectionProto.Coord pc) throws IOException { List<EnsCoord.Coord> coords = new ArrayList<EnsCoord.Coord>(pc.getValuesCount()); for (int i = 0; i < pc.getValuesCount(); i += 2) coords.add(new EnsCoord.Coord((int) pc.getValues(i), (int) pc.getValues(i + 1))); return new EnsCoord(coords); }
// read all records in all files, // divide into groups based on GDS hash // each group has an arraylist of all records that belong to it. // for each group, run rectlizer to derive the coordinates and variables public List<Group> makeAggregatedGroups( List<String> filenames, CollectionManager.Force force, Formatter f) throws IOException { Map<Integer, Group> gdsMap = new HashMap<Integer, Group>(); boolean intvMerge = mergeIntvDefault; f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName()); int total = 0; int fileno = 0; for (CollectionManager dcm : collections) { f.format(" dcm= %s%n", dcm); FeatureCollectionConfig.GribConfig config = (FeatureCollectionConfig.GribConfig) dcm.getAuxInfo(FeatureCollectionConfig.AUX_GRIB_CONFIG); Map<Integer, Integer> gdsConvert = (config != null) ? config.gdsHash : null; FeatureCollectionConfig.GribIntvFilter intvMap = (config != null) ? config.intvFilter : null; intvMerge = (config == null) || (config.intvMerge == null) ? mergeIntvDefault : config.intvMerge; for (MFile mfile : dcm.getFiles()) { // f.format("%3d: %s%n", fileno, mfile.getPath()); filenames.add(mfile.getPath()); Grib2Index index = null; try { index = (Grib2Index) GribIndex.readOrCreateIndexFromSingleFile( false, !isSingleFile, mfile, config, force, f); } catch (IOException ioe) { logger.warn( "GribCollectionBuilder {}: reading/Creating gbx9 index failed err={}", gc.getName(), ioe.getMessage()); f.format( "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n skipping %s%n", ioe.getMessage(), mfile.getPath() + GribIndex.IDX_EXT); continue; } for (Grib2Record gr : index.getRecords()) { if (this.tables == null) { Grib2SectionIdentification ids = gr.getId(); // so all records must use the same table (!) this.tables = Grib2Customizer.factory( ids.getCenter_id(), ids.getSubcenter_id(), ids.getMaster_table_version(), ids.getLocal_table_version()); if (config != null) tables.setTimeUnitConverter( config .getTimeUnitConverter()); // LOOK doesnt really work with multiple collections } if (intvMap != null && filterTinv(gr, intvMap, f)) continue; // skip gr.setFile(fileno); // each record tracks which file it belongs to int gdsHash = gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records if (gdsConvert != null && gdsConvert.get(gdsHash) != null) // allow external config to muck with gdsHash. Why? because of error in // encoding gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching Group g = gdsMap.get(gdsHash); if (g == null) { g = new Group(gr.getGDSsection(), gdsHash); gdsMap.put(gdsHash, g); } g.records.add(gr); total++; } fileno++; } } f.format(" total grib records= %d%n", total); Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter(); // debugging List<Group> result = new ArrayList<Group>(gdsMap.values()); for (Group g : result) { g.rect = new Grib2Rectilyser(tables, g.records, g.gdsHash, intvMerge); f.format(" GDS hash %d == ", g.gdsHash); g.rect.make(f, c, filenames); } f.format( " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n", c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records); return result; }