Ejemplo n.º 1
0
  @Override
  public AbstractGridDataset createDataset(String id, String location)
      throws IOException, EdalException {
    NetcdfDataset nc = null;
    try {
      /*
       * Open the dataset, using the cache for NcML aggregations
       */
      nc = openAndAggregateDataset(location);

      /*-
       * We may in future be able to use forecast model run collection aggregations for
       * dealing with the case of overlapping time axes.  To do this the code will look
       * something like this:
       *
       * StringBuilder sb = new StringBuilder();
       * Formatter formatter = new Formatter(sb, Locale.UK);
       * Fmrc f = Fmrc.open(location, formatter);
       *
       * in openAndAggregateDataset.  It will need to build up an NcML document which
       * does this.  It should look something like:
       *
       *  <netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2" enhance="true">
       *      <aggregation dimName="run" type="forecastModelRunCollection" timeUnitsChange="true">
       *           <!-- scanFmrc actually works, but what we want is something like the following bit -->
       *           <scanFmrc location="/home/guy/Data/POLCOMS_IRISH/" regExp=".*\.nc"/>
       *           <netcdf location="/home/guy/Data/POLCOMS_IRISH/polcoms_irish_hourly_20090320.nc" coordValue="2009-03-20T00:00:00Z" enhance="true" />
       *           <netcdf location="/home/guy/Data/POLCOMS_IRISH/polcoms_irish_hourly_20090321.nc" coordValue="2009-03-21T00:00:00Z" enhance="true" />
       *           <netcdf location="/home/guy/Data/POLCOMS_IRISH/polcoms_irish_hourly_20090322.nc" coordValue="2009-03-22T00:00:00Z" enhance="true" />
       *      </aggregation>
       *  </netcdf>
       *
       * For more documentation see:
       * http://mailman.unidata.ucar.edu/software/thredds/current/netcdf-java/ncml/FmrcAggregation.html
       *
       * We then can do stuff like:
       *
       * ucar.nc2.dt.GridDataset gridDataset = f.getDatasetBest();
       *
       * To get the single best aggregation of the overlapping time axis
       *
       * Then we need to work with GridDatasets in place of NetcdfDatasets.  Stuff like:
       *
       * for(Variable variable : gridDataset.getNetcdfFile().getVariables()) {
       *    // blah blah
       * }
       *
       * will be necessary.  We need to check that that works with remote datasets too
       */

      /*
       * We look for NetCDF-U variables to group mean/standard-deviation.
       *
       * We need to do this here because we want to subsequently ignore
       * parent variables
       */
      Map<String, String[]> varId2AncillaryVars = new HashMap<String, String[]>();
      for (Variable variable : nc.getVariables()) {
        /*
         * Just look for parent variables, since these may not have a
         * grid directly associated with them
         */
        for (Attribute attr : variable.getAttributes()) {
          if (attr.getFullName().equalsIgnoreCase("ancillary_variables")) {
            varId2AncillaryVars.put(variable.getFullName(), attr.getStringValue().split(" "));
            continue;
          }
        }
      }

      ucar.nc2.dt.GridDataset gridDataset = CdmUtils.getGridDataset(nc);
      List<GridVariableMetadata> vars = new ArrayList<GridVariableMetadata>();
      /*
       * Store a map of component names. Key is the compound name, value
       * is a 2-element String array with x, y component IDs
       *
       * Also store a map of whether these components are really
       * eastward/northward, or whether they are locally u/v
       */
      Map<String, String[]> xyComponentPairs = new HashMap<String, String[]>();
      Map<String, Boolean> xyNameToTrueEN = new HashMap<String, Boolean>();
      /*
       * Store a map of variable IDs to UncertML URLs. This will be used
       * to determine which components are mean/std/etc.
       *
       * TODO implement more than just Mean/SD
       */
      Map<String, String> varId2UncertMLRefs = new HashMap<String, String>();
      /*
       * Here we store the parent variable IDs and their corresponding
       * title.
       */
      Map<String, String> parentVarId2Title = new HashMap<String, String>();
      for (Gridset gridset : gridDataset.getGridsets()) {
        GridCoordSystem coordSys = gridset.getGeoCoordSystem();
        HorizontalGrid hDomain = CdmUtils.createHorizontalGrid(coordSys);
        VerticalAxis zDomain = CdmUtils.createVerticalAxis(coordSys);
        TimeAxis tDomain = CdmUtils.createTimeAxis(coordSys);

        /*
         * Create a VariableMetadata object for each GridDatatype
         */
        for (GridDatatype grid : gridset.getGrids()) {
          VariableDS variable = grid.getVariable();
          String varId = variable.getFullName();
          String name = getVariableName(variable);

          /*
           * If this is a parent variable for a stats collection, we
           * don't want it to be a normal variable as well.
           */
          if (varId2AncillaryVars.containsKey(varId)) {
            parentVarId2Title.put(varId, name);
            continue;
          }

          /*
           * If it is a child variable is (potentially) referenced by
           * UncertML, store its ID and the (possible) UncertML URI
           */
          for (Attribute attr : variable.getAttributes()) {
            if (attr.getFullName().equalsIgnoreCase("ref")) {
              varId2UncertMLRefs.put(varId, attr.getStringValue());
            }
          }

          Parameter parameter =
              new Parameter(
                  varId,
                  variable.getShortName(),
                  variable.getDescription(),
                  variable.getUnitsString(),
                  name);
          GridVariableMetadata metadata =
              new GridVariableMetadata(
                  variable.getFullName(), parameter, hDomain, zDomain, tDomain, true);
          vars.add(metadata);

          if (name != null) {
            /*
             * Check for vector components
             */
            if (name.contains("eastward_")) {
              String compoundName = name.replaceFirst("eastward_", "");
              String[] cData;
              if (!xyComponentPairs.containsKey(compoundName)) {
                cData = new String[2];
                xyComponentPairs.put(compoundName, cData);
                xyNameToTrueEN.put(compoundName, true);
              }
              cData = xyComponentPairs.get(compoundName);
              /*
               * By doing this, we will end up with the merged
               * coverage
               */
              cData[0] = varId;
            } else if (name.contains("northward_")) {
              String compoundName = name.replaceFirst("northward_", "");
              String[] cData;
              if (!xyComponentPairs.containsKey(compoundName)) {
                cData = new String[2];
                xyComponentPairs.put(compoundName, cData);
                xyNameToTrueEN.put(compoundName, true);
              }
              cData = xyComponentPairs.get(compoundName);
              /*
               * By doing this, we will end up with the merged
               * coverage
               */
              cData[1] = varId;
            } else if (name.matches("u-.*component")) {
              String compoundName = name.replaceFirst("u-(.*)component", "$1");
              String[] cData;
              if (!xyComponentPairs.containsKey(compoundName)) {
                cData = new String[2];
                xyComponentPairs.put(compoundName, cData);
                xyNameToTrueEN.put(compoundName, false);
              }
              cData = xyComponentPairs.get(compoundName);
              /*
               * By doing this, we will end up with the merged
               * coverage
               */
              cData[0] = varId;
            } else if (name.matches("v-.*component")) {
              String compoundName = name.replaceFirst("v-(.*)component", "$1");
              String[] cData;
              if (!xyComponentPairs.containsKey(compoundName)) {
                cData = new String[2];
                xyComponentPairs.put(compoundName, cData);
                xyNameToTrueEN.put(compoundName, false);
              }
              cData = xyComponentPairs.get(compoundName);
              /*
               * By doing this, we will end up with the merged
               * coverage
               */
              cData[1] = varId;
            }
            /*
             * We could potentially add a check for zonal/meridional
             * here if required.
             */
          }
        }
      }

      CdmGridDataset cdmGridDataset =
          new CdmGridDataset(id, location, vars, CdmUtils.getOptimumDataReadingStrategy(nc));
      for (Entry<String, String[]> componentData : xyComponentPairs.entrySet()) {
        String commonName = componentData.getKey();
        String[] comps = componentData.getValue();
        if (comps[0] != null && comps[1] != null) {
          cdmGridDataset.addVariablePlugin(
              new VectorPlugin(comps[0], comps[1], commonName, xyNameToTrueEN.get(commonName)));
        }
      }

      for (String statsCollectionId : varId2AncillaryVars.keySet()) {
        String[] ids = varId2AncillaryVars.get(statsCollectionId);
        String meanId = null;
        String stddevId = null;
        for (String statsVarIds : ids) {
          String uncertRef = varId2UncertMLRefs.get(statsVarIds);
          if (uncertRef != null
              && uncertRef.equalsIgnoreCase("http://www.uncertml.org/statistics/mean")) {
            meanId = statsVarIds;
          }
          if (uncertRef != null
              && uncertRef.equalsIgnoreCase(
                  "http://www.uncertml.org/statistics/standard-deviation")) {
            stddevId = statsVarIds;
          }
        }
        if (meanId != null && stddevId != null) {
          MeanSDPlugin meanSDPlugin =
              new MeanSDPlugin(meanId, stddevId, parentVarId2Title.get(statsCollectionId));
          cdmGridDataset.addVariablePlugin(meanSDPlugin);
        }
      }

      return cdmGridDataset;
    } finally {
      CdmUtils.closeDataset(nc);
    }
  }
Ejemplo n.º 2
0
  /**
   * Opens the NetCDF dataset at the given location, using the dataset cache if {@code location}
   * represents an NcML aggregation. We cannot use the cache for OPeNDAP or single NetCDF files
   * because the underlying data may have changed and the NetcdfDataset cache may cache a dataset
   * forever. In the case of NcML we rely on the fact that server administrators ought to have set a
   * "recheckEvery" parameter for NcML aggregations that may change with time. It is desirable to
   * use the dataset cache for NcML aggregations because they can be time-consuming to assemble and
   * we don't want to do this every time a map is drawn.
   *
   * @param location The location of the data: a local NetCDF file, an NcML aggregation file or an
   *     OPeNDAP location, {@literal i.e.} anything that can be passed to
   *     NetcdfDataset.openDataset(location).
   * @return a {@link NetcdfDataset} object for accessing the data at the given location.
   * @throws IOException if there was an error reading from the data source.
   */
  private NetcdfDataset openAndAggregateDataset(String location) throws IOException, EdalException {
    NetcdfDataset nc;
    if (location.startsWith("dods://") || location.startsWith("http://")) {
      /*
       * We have a remote dataset
       */
      nc = CdmUtils.openDataset(location);
    } else {
      /*
       * We have a local dataset
       */
      List<File> files = null;
      try {
        files = CdmUtils.expandGlobExpression(location);
      } catch (NullPointerException e) {
        System.out.println("NPE processing location: " + location);
        throw e;
      }
      if (files.size() == 0) {
        throw new EdalException(
            "The location " + location + " doesn't refer to any existing files.");
      }
      if (files.size() == 1) {
        location = files.get(0).getAbsolutePath();
        nc = CdmUtils.openDataset(location);
      } else {
        /*
         * We have multiple files in a glob expression. We write some
         * NcML and use the NetCDF aggregation libs to parse this into
         * an aggregated dataset.
         *
         * If we have already generated the ncML on a previous call,
         * just use that.
         */
        if (ncmlString == null) {
          /*
           * Find the name of the time dimension
           */
          NetcdfDataset first = openAndAggregateDataset(files.get(0).getAbsolutePath());
          String timeDimName = null;
          for (Variable var : first.getVariables()) {
            if (var.isCoordinateVariable()) {
              for (Attribute attr : var.getAttributes()) {
                if (attr.getFullName().equalsIgnoreCase("units")
                    && attr.getStringValue().contains(" since ")) {
                  /*
                   * This is the time dimension. Since this is
                   * a co-ordinate variable, there is only 1
                   * dimension
                   */
                  Dimension timeDimension = var.getDimension(0);
                  timeDimName = timeDimension.getFullName();
                }
              }
            }
          }
          first.close();
          if (timeDimName == null) {
            throw new EdalException("Cannot join multiple files without time dimensions");
          }
          /*
           * We can't assume that the glob expression will have
           * returned the files in time order.
           *
           * We could assume that alphabetical == time ordered (and
           * for properly named files it will - but let's not rely on
           * our users having sensible naming conventions...
           *
           * Sort the list using a comparator which opens the file and
           * gets the first value of the time dimension
           */
          final String aggDimName = timeDimName;
          Collections.sort(
              files,
              new Comparator<File>() {
                @Override
                public int compare(File ncFile1, File ncFile2) {
                  NetcdfFile nc1 = null;
                  NetcdfFile nc2 = null;
                  try {
                    nc1 = NetcdfFile.open(ncFile1.getAbsolutePath());
                    nc2 = NetcdfFile.open(ncFile2.getAbsolutePath());
                    Variable timeVar1 = nc1.findVariable(aggDimName);
                    Variable timeVar2 = nc2.findVariable(aggDimName);
                    long time1 = timeVar1.read().getLong(0);
                    long time2 = timeVar2.read().getLong(0);
                    return Long.compare(time1, time2);
                  } catch (Exception e) {
                    /*
                     * There was a problem reading the data. Sort
                     * alphanumerically by filename and hope for the
                     * best...
                     *
                     * This catches all exceptions because however
                     * it fails this is still our best option.
                     *
                     * If the error is a genuine problem, it'll show
                     * up as soon as we try and aggregate.
                     */
                    return ncFile1.getAbsolutePath().compareTo(ncFile2.getAbsolutePath());
                  } finally {
                    if (nc1 != null) {
                      try {
                        nc1.close();
                      } catch (IOException e) {
                        log.error("Problem closing netcdf file", e);
                      }
                    }
                    if (nc2 != null) {
                      try {
                        nc2.close();
                      } catch (IOException e) {
                        log.error("Problem closing netcdf file", e);
                      }
                    }
                  }
                }
              });

          /*
           * Now create the NcML string and use it to create an
           * aggregated dataset
           */
          StringBuffer ncmlStringBuffer = new StringBuffer();
          ncmlStringBuffer.append(
              "<netcdf xmlns=\"http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2\">");
          ncmlStringBuffer.append(
              "<aggregation dimName=\"" + timeDimName + "\" type=\"joinExisting\">");
          for (File file : files) {
            ncmlStringBuffer.append("<netcdf location=\"" + file.getAbsolutePath() + "\"/>");
          }
          ncmlStringBuffer.append("</aggregation>");
          ncmlStringBuffer.append("</netcdf>");

          ncmlString = ncmlStringBuffer.toString();
        }
        nc = NcMLReader.readNcML(new StringReader(ncmlString), null);
      }
    }

    return nc;
  }