Пример #1
0
  // read or create index
  private void readOrCreateIndex(CollectionManager.Force ff, Formatter f) throws IOException {

    // force new index or test for new index needed
    boolean force =
        ((ff == CollectionManager.Force.always)
            || (ff == CollectionManager.Force.test && needsUpdate()));

    // otherwise, we're good as long as the index file exists
    File idx = gc.getIndexFile();
    if (force || !idx.exists() || !readIndex(idx.getPath())) {
      logger.info("GribCollection {}: createIndex {}", gc.getName(), idx.getPath());
      createIndex(idx, ff, f); // write out index
      gc.rafLocation = idx.getPath();
      gc.setRaf(new RandomAccessFile(idx.getPath(), "r"));
      readIndex(gc.getRaf()); // read back in index
    }
  }
 private boolean needsUpdate(long collectionLastModified, Formatter f) throws IOException {
   CollectionManager.ChangeChecker cc = Grib1Index.getChangeChecker();
   for (CollectionManager dcm :
       tpc.makePartitions()) { // LOOK not really right, since we dont know if these files are the
     // same as in the index
     File idxFile = GribCollection.getIndexFile(dcm);
     if (!idxFile.exists()) return true;
     if (collectionLastModified < idxFile.lastModified()) return true;
     for (MFile mfile : dcm.getFiles()) {
       if (cc.hasChangedSince(mfile, idxFile.lastModified())) return true;
     }
   }
   return false;
 }
  protected GribCollection.VariableIndex readVariable(
      GribCollectionProto.Variable pv, GribCollection.GroupHcs group) {
    int discipline = pv.getDiscipline();
    int category = pv.getCategory();
    int param = pv.getParameter();
    int levelType = pv.getLevelType();
    int intvType = pv.getIntervalType();
    String intvName = pv.getIntvName();
    boolean isLayer = pv.getIsLayer();
    int ensDerivedType = pv.getEnsDerivedType();
    int probType = pv.getProbabilityType();
    String probabilityName = pv.getProbabilityName();
    int cdmHash = pv.getCdmHash();
    long recordsPos = pv.getRecordsPos();
    int recordsLen = pv.getRecordsLen();
    int timeIdx = pv.getTimeIdx();
    int vertIdx = pv.getVertIdx();
    int ensIdx = pv.getEnsIdx();
    int tableVersion = pv.getTableVersion();

    return gc.makeVariableIndex(
        group,
        tableVersion,
        discipline,
        category,
        param,
        levelType,
        isLayer,
        intvType,
        intvName,
        ensDerivedType,
        probType,
        probabilityName,
        cdmHash,
        timeIdx,
        vertIdx,
        ensIdx,
        recordsPos,
        recordsLen);
  }
  /*
  MAGIC_START
  version
  sizeRecords
  VariableRecords (sizeRecords bytes)
  sizeIndex
  GribCollectionIndex (sizeIndex bytes)
  */
  private boolean writeIndex(TimePartition.Partition canon, Formatter f) throws IOException {
    File file = tp.getIndexFile();
    if (file.exists()) {
      if (!file.delete()) logger.error("Cant delete " + file.getPath());
    }

    RandomAccessFile raf = new RandomAccessFile(file.getPath(), "rw");
    raf.order(RandomAccessFile.BIG_ENDIAN);
    try {
      //// header message
      raf.write(MAGIC_START.getBytes("UTF-8"));
      raf.writeInt(version);
      raf.writeLong(0); // no record section

      GribCollectionProto.GribCollectionIndex.Builder indexBuilder =
          GribCollectionProto.GribCollectionIndex.newBuilder();
      indexBuilder.setName(tp.getName());

      GribCollection canonGc = canon.makeGribCollection(f);
      for (GribCollection.GroupHcs g : canonGc.getGroups())
        indexBuilder.addGroups(writeGroupProto(g));

      indexBuilder.setCenter(canonGc.getCenter());
      indexBuilder.setSubcenter(canonGc.getSubcenter());
      indexBuilder.setMaster(canonGc.getMaster());
      indexBuilder.setLocal(canonGc.getLocal());

      for (TimePartition.Partition p : tp.getPartitions()) {
        indexBuilder.addPartitions(writePartitionProto(p.getName(), (TimePartition.Partition) p));
      }

      GribCollectionProto.GribCollectionIndex index = indexBuilder.build();
      byte[] b = index.toByteArray();
      NcStream.writeVInt(raf, b.length); // message size
      raf.write(b); // message  - all in one gulp
      f.format("GribCollectionTimePartitionedIndex= %d bytes%n", b.length);

    } finally {
      f.format("file size =  %d bytes%n", raf.length());
      raf.close();
    }

    return true;
  }
  // consistency check on variables : compare each variable to corresponding one in proto
  // also set the groupno and partno for each partition
  private boolean checkPartitions(TimePartition.Partition canon, Formatter f) throws IOException {
    List<TimePartition.Partition> partitions = tp.getPartitions();
    int npart = partitions.size();
    boolean ok = true;

    // for each group in canonical Partition
    GribCollection canonGc = canon.makeGribCollection(f);
    for (GribCollection.GroupHcs firstGroup : canonGc.getGroups()) {
      String gname = firstGroup.getId();
      if (trace) f.format(" Check Group %s%n", gname);

      // hash proto variables for quick lookup
      Map<Integer, GribCollection.VariableIndex> check =
          new HashMap<Integer, GribCollection.VariableIndex>(firstGroup.varIndex.size());
      List<GribCollection.VariableIndex> varIndexP =
          new ArrayList<GribCollection.VariableIndex>(firstGroup.varIndex.size());
      for (GribCollection.VariableIndex vi : firstGroup.varIndex) {
        TimePartition.VariableIndexPartitioned vip = tp.makeVariableIndexPartitioned(vi, npart);
        varIndexP.add(vip);
        check.put(vi.cdmHash, vip); // replace with its evil twin
      }
      firstGroup.varIndex = varIndexP; // replace with its evil twin

      // for each partition
      for (int partno = 0; partno < npart; partno++) {
        TimePartition.Partition tpp = partitions.get(partno);
        if (trace) f.format(" Check Partition %s%n", tpp.getName());

        // get corresponding group
        GribCollection gc = tpp.makeGribCollection(f);
        int groupIdx = gc.findGroupIdxById(firstGroup.getId());
        if (groupIdx < 0) {
          f.format(" Cant find group %s in partition %s%n", gname, tpp.getName());
          ok = false;
          continue;
        }
        GribCollection.GroupHcs group = gc.getGroup(groupIdx);

        // for each variable in partition group
        for (int varIdx = 0; varIdx < group.varIndex.size(); varIdx++) {
          GribCollection.VariableIndex vi2 = group.varIndex.get(varIdx);
          if (trace) f.format(" Check variable %s%n", vi2);
          int flag = 0;

          GribCollection.VariableIndex vi1 = check.get(vi2.cdmHash); // compare with proto variable
          if (vi1 == null) {
            f.format(
                "   WARN Cant find variable %s from %s in proto - ignoring that variable%n",
                vi2, tpp.getName());
            continue; // we can tolerate this
          }

          // compare vert coordinates
          VertCoord vc1 = vi1.getVertCoord();
          VertCoord vc2 = vi2.getVertCoord();
          if ((vc1 == null) != (vc2 == null)) {
            f.format(
                "   ERR Vert coordinates existence on variable %s in %s doesnt match%n",
                vi2, tpp.getName());
            ok = false;
          } else if ((vc1 != null) && !vc1.equalsData(vc2)) {
            f.format(
                "   WARN Vert coordinates values on variable %s in %s dont match%n",
                vi2, tpp.getName());
            f.format("    canon vc = %s%n", vc1);
            f.format("    this vc = %s%n", vc2);
            flag |= TimePartition.VERT_COORDS_DIFFER;
          }

          // compare ens coordinates
          EnsCoord ec1 = vi1.getEnsCoord();
          EnsCoord ec2 = vi2.getEnsCoord();
          if ((ec1 == null) != (ec2 == null)) {
            f.format(
                "   ERR Ensemble coordinates existence on variable %s in %s doesnt match%n",
                vi2, tpp.getName());
            ok = false;
          } else if ((ec1 != null) && !ec1.equalsData(ec2)) {
            f.format(
                "   WARN Ensemble coordinates values on variable %s in %s dont match%n",
                vi2, tpp.getName());
            f.format("    canon ec = %s%n", ec1);
            f.format("    this ec = %s%n", ec2);
            flag |= TimePartition.ENS_COORDS_DIFFER;
          }

          ((TimePartition.VariableIndexPartitioned) vi1)
              .setPartitionIndex(partno, groupIdx, varIdx, flag);
        } // loop over variable
      } // loop over partition
    } // loop over group

    if (ok) f.format("  Partition check: vert, ens coords OK%n");
    return ok;
  }
Пример #6
0
  private void createIndex(
      File indexFile, List<Group> groups, ArrayList<String> filenames, Formatter f)
      throws IOException {
    Grib2Record first = null; // take global metadata from here

    if (indexFile.exists()) indexFile.delete(); // replace it
    f.format(" createIndex for %s%n", indexFile.getPath());

    RandomAccessFile raf = new RandomAccessFile(indexFile.getPath(), "rw");
    raf.order(RandomAccessFile.BIG_ENDIAN);
    try {
      //// header message
      raf.write(MAGIC_START.getBytes("UTF-8"));
      raf.writeInt(version);
      long lenPos = raf.getFilePointer();
      raf.writeLong(0); // save space to write the length of the record section
      long countBytes = 0;
      int countRecords = 0;
      for (Group g : groups) {
        g.fileSet = new HashSet<Integer>();
        for (Grib2Rectilyser.VariableBag vb : g.rect.getGribvars()) {
          if (first == null) first = vb.first;
          GribCollectionProto.VariableRecords vr = writeRecordsProto(vb, g.fileSet);
          byte[] b = vr.toByteArray();
          vb.pos = raf.getFilePointer();
          vb.length = b.length;
          raf.write(b);
          countBytes += b.length;
          countRecords += vb.recordMap.length;
        }
      }
      long bytesPerRecord = countBytes / ((countRecords == 0) ? 1 : countRecords);
      f.format(
          "  write RecordMaps: bytes = %d record = %d bytesPerRecord=%d%n",
          countBytes, countRecords, bytesPerRecord);

      if (first == null) {
        logger.error("GribCollection {}: has no files\n{}", gc.getName(), f.toString());
        throw new IllegalArgumentException("GribCollection " + gc.getName() + " has no files");
      }

      long pos = raf.getFilePointer();
      raf.seek(lenPos);
      raf.writeLong(countBytes);
      raf.seek(pos); // back to the output.

      GribCollectionProto.GribCollectionIndex.Builder indexBuilder =
          GribCollectionProto.GribCollectionIndex.newBuilder();
      indexBuilder.setName(gc.getName());

      for (String fn : filenames) indexBuilder.addFiles(fn);

      for (Group g : groups) indexBuilder.addGroups(writeGroupProto(g));

      /* int count = 0;
      for (DatasetCollectionManager dcm : collections) {
        indexBuilder.addParams(makeParamProto(new Parameter("spec" + count, dcm.())));
        count++;
      } */

      // what about just storing first ??
      Grib2SectionIdentification ids = first.getId();
      indexBuilder.setCenter(ids.getCenter_id());
      indexBuilder.setSubcenter(ids.getSubcenter_id());
      indexBuilder.setMaster(ids.getMaster_table_version());
      indexBuilder.setLocal(ids.getLocal_table_version());

      Grib2Pds pds = first.getPDS();
      indexBuilder.setGenProcessType(pds.getGenProcessType());
      indexBuilder.setGenProcessId(pds.getGenProcessId());
      indexBuilder.setBackProcessId(pds.getBackProcessId());

      GribCollectionProto.GribCollectionIndex index = indexBuilder.build();
      byte[] b = index.toByteArray();
      NcStream.writeVInt(raf, b.length); // message size
      raf.write(b); // message  - all in one gulp
      f.format("  write GribCollectionIndex= %d bytes%n", b.length);

    } finally {
      f.format("  file size =  %d bytes%n", raf.length());
      raf.close();
      if (raf != null) raf.close();
    }
  }
Пример #7
0
  // read all records in all files,
  // divide into groups based on GDS hash
  // each group has an arraylist of all records that belong to it.
  // for each group, run rectlizer to derive the coordinates and variables
  public List<Group> makeAggregatedGroups(
      ArrayList<String> filenames, CollectionManager.Force force, Formatter f) throws IOException {
    Map<Integer, Group> gdsMap = new HashMap<Integer, Group>();

    f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName());
    int total = 0;
    int fileno = 0;
    for (CollectionManager dcm : collections) {
      // dcm.scanIfNeeded(); // LOOK ??
      f.format(" dcm= %s%n", dcm);
      Map<Integer, Integer> gdsConvert = (Map<Integer, Integer>) dcm.getAuxInfo("gdsHash");

      for (MFile mfile : dcm.getFiles()) {
        // f.format("%3d: %s%n", fileno, mfile.getPath());
        filenames.add(mfile.getPath());

        Grib2Index index = new Grib2Index();
        try {
          if (!index.readIndex(
              mfile.getPath(),
              mfile.getLastModified(),
              force)) { // heres where the index date is checked against the data file
            index.makeIndex(mfile.getPath(), f);
            f.format(
                "  Index written: %s == %d records %n",
                mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size());
          } else if (debug) {
            f.format(
                "  Index read: %s == %d records %n",
                mfile.getName() + Grib2Index.IDX_EXT, index.getRecords().size());
          }
        } catch (IOException ioe) {
          f.format(
              "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n  skipping %s%n",
              ioe.getMessage(), mfile.getPath() + Grib2Index.IDX_EXT);
          continue;
        }

        for (Grib2Record gr : index.getRecords()) {
          gr.setFile(fileno); // each record tracks which file it belongs to
          int gdsHash =
              gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records
          if (gdsConvert != null
              && gdsConvert.get(gdsHash)
                  != null) { // allow external config to muck with gdsHash. Why? because of error in
            // encoding
            gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching
          }

          Group g = gdsMap.get(gdsHash);
          if (g == null) {
            g = new Group(gr.getGDSsection(), gdsHash);
            gdsMap.put(gdsHash, g);
          }
          g.records.add(gr);
          total++;
        }
        fileno++;
      }
    }
    f.format(" total grib records= %d%n", total);

    Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter();
    List<Group> result = new ArrayList<Group>(gdsMap.values());
    for (Group g : result) {
      g.rect = new Grib2Rectilyser(g.records, g.gdsHash);
      f.format(" GDS hash %d == ", g.gdsHash);
      g.rect.make(f, c);
    }
    f.format(
        " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n",
        c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records);

    return result;
  }
Пример #8
0
  public boolean readIndex(RandomAccessFile raf) {
    gc.setRaf(raf); // LOOK leaving the raf open in the GribCollection
    try {
      raf.order(RandomAccessFile.BIG_ENDIAN);
      raf.seek(0);

      //// header message
      if (!NcStream.readAndTest(raf, MAGIC_START.getBytes())) {
        logger.error("GribCollection {}: invalid index", gc.getName());
        return false;
      }

      int v = raf.readInt();
      if (v != getVersion()) {
        logger.warn(
            "GribCollection {}: index found version={}, want version= {} on file {}",
            new Object[] {gc.getName(), v, version, raf.getLocation()});
        return false;
      }

      long skip = raf.readLong();
      raf.skipBytes(skip);

      int size = NcStream.readVInt(raf);
      if ((size < 0) || (size > 100 * 1000 * 1000)) {
        logger.warn("GribCollection {}: invalid index ", gc.getName());
        return false;
      }

      byte[] m = new byte[size];
      raf.readFully(m);

      GribCollectionProto.GribCollectionIndex proto =
          GribCollectionProto.GribCollectionIndex.parseFrom(m);

      gc.center = proto.getCenter();
      gc.subcenter = proto.getSubcenter();
      gc.master = proto.getMaster();
      gc.local = proto.getLocal();
      gc.genProcessType = proto.getGenProcessType();
      gc.genProcessId = proto.getGenProcessId();
      gc.backProcessId = proto.getBackProcessId();
      gc.local = proto.getLocal();
      // gc.tables = Grib2Tables.factory(gc.center, gc.subcenter, gc.master, gc.local);

      gc.filenames = new ArrayList<String>(proto.getFilesCount());
      for (int i = 0; i < proto.getFilesCount(); i++) gc.filenames.add(proto.getFiles(i));

      // error condition on a GribCollection Index
      if ((proto.getFilesCount() == 0) && !(this instanceof TimePartitionBuilder)) {
        logger.warn("GribCollection {}: has no files, force recreate ", gc.getName());
        return false;
      }

      gc.groups = new ArrayList<GribCollection.GroupHcs>(proto.getGroupsCount());
      for (int i = 0; i < proto.getGroupsCount(); i++)
        gc.groups.add(readGroup(proto.getGroups(i), gc.makeGroup()));
      Collections.sort(gc.groups);

      gc.params = new ArrayList<Parameter>(proto.getParamsCount());
      for (int i = 0; i < proto.getParamsCount(); i++) gc.params.add(readParam(proto.getParams(i)));

      if (!readPartitions(proto)) {
        logger.warn("TimePartition {}: has no partitions, force recreate ", gc.getName());
        return false;
      }

      return true;

    } catch (Throwable t) {
      logger.error("Error reading index " + raf.getLocation(), t);
      return false;
    }
  }
Пример #9
0
 public boolean needsUpdate() {
   File idx = gc.getIndexFile();
   return !idx.exists() || needsUpdate(idx.lastModified());
 }
  // read all records in all files,
  // divide into groups based on GDS hash
  // each group has an arraylist of all records that belong to it.
  // for each group, run rectlizer to derive the coordinates and variables
  public List<Group> makeAggregatedGroups(
      List<String> filenames, CollectionManager.Force force, Formatter f) throws IOException {
    Map<Integer, Group> gdsMap = new HashMap<Integer, Group>();
    boolean intvMerge = mergeIntvDefault;

    f.format("GribCollection %s: makeAggregatedGroups%n", gc.getName());
    int total = 0;
    int fileno = 0;

    for (CollectionManager dcm : collections) {
      f.format(" dcm= %s%n", dcm);
      FeatureCollectionConfig.GribConfig config =
          (FeatureCollectionConfig.GribConfig)
              dcm.getAuxInfo(FeatureCollectionConfig.AUX_GRIB_CONFIG);
      Map<Integer, Integer> gdsConvert = (config != null) ? config.gdsHash : null;
      FeatureCollectionConfig.GribIntvFilter intvMap = (config != null) ? config.intvFilter : null;
      intvMerge =
          (config == null) || (config.intvMerge == null) ? mergeIntvDefault : config.intvMerge;

      for (MFile mfile : dcm.getFiles()) {
        // f.format("%3d: %s%n", fileno, mfile.getPath());
        filenames.add(mfile.getPath());

        Grib2Index index = null;
        try {
          index =
              (Grib2Index)
                  GribIndex.readOrCreateIndexFromSingleFile(
                      false, !isSingleFile, mfile, config, force, f);

        } catch (IOException ioe) {
          logger.warn(
              "GribCollectionBuilder {}: reading/Creating gbx9 index failed err={}",
              gc.getName(),
              ioe.getMessage());
          f.format(
              "GribCollectionBuilder: reading/Creating gbx9 index failed err=%s%n  skipping %s%n",
              ioe.getMessage(), mfile.getPath() + GribIndex.IDX_EXT);
          continue;
        }

        for (Grib2Record gr : index.getRecords()) {
          if (this.tables == null) {
            Grib2SectionIdentification ids =
                gr.getId(); // so all records must use the same table (!)
            this.tables =
                Grib2Customizer.factory(
                    ids.getCenter_id(),
                    ids.getSubcenter_id(),
                    ids.getMaster_table_version(),
                    ids.getLocal_table_version());
            if (config != null)
              tables.setTimeUnitConverter(
                  config
                      .getTimeUnitConverter()); // LOOK doesnt really work with multiple collections
          }
          if (intvMap != null && filterTinv(gr, intvMap, f)) continue; // skip

          gr.setFile(fileno); // each record tracks which file it belongs to
          int gdsHash =
              gr.getGDSsection().getGDS().hashCode(); // use GDS hash code to group records
          if (gdsConvert != null
              && gdsConvert.get(gdsHash)
                  != null) // allow external config to muck with gdsHash. Why? because of error in
            // encoding
            gdsHash = (Integer) gdsConvert.get(gdsHash); // and we need exact hash matching

          Group g = gdsMap.get(gdsHash);
          if (g == null) {
            g = new Group(gr.getGDSsection(), gdsHash);
            gdsMap.put(gdsHash, g);
          }
          g.records.add(gr);
          total++;
        }
        fileno++;
      }
    }
    f.format(" total grib records= %d%n", total);

    Grib2Rectilyser.Counter c = new Grib2Rectilyser.Counter(); // debugging
    List<Group> result = new ArrayList<Group>(gdsMap.values());
    for (Group g : result) {
      g.rect = new Grib2Rectilyser(tables, g.records, g.gdsHash, intvMerge);
      f.format(" GDS hash %d == ", g.gdsHash);
      g.rect.make(f, c, filenames);
    }
    f.format(
        " Rectilyser: nvars=%d records unique=%d total=%d dups=%d (%f) %n",
        c.vars, c.recordsUnique, c.records, c.dups, ((float) c.dups) / c.records);

    return result;
  }