@Override public AbstractGridDataset createDataset(String id, String location) throws IOException, EdalException { NetcdfDataset nc = null; try { /* * Open the dataset, using the cache for NcML aggregations */ nc = openAndAggregateDataset(location); /*- * We may in future be able to use forecast model run collection aggregations for * dealing with the case of overlapping time axes. To do this the code will look * something like this: * * StringBuilder sb = new StringBuilder(); * Formatter formatter = new Formatter(sb, Locale.UK); * Fmrc f = Fmrc.open(location, formatter); * * in openAndAggregateDataset. It will need to build up an NcML document which * does this. It should look something like: * * <netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2" enhance="true"> * <aggregation dimName="run" type="forecastModelRunCollection" timeUnitsChange="true"> * <!-- scanFmrc actually works, but what we want is something like the following bit --> * <scanFmrc location="/home/guy/Data/POLCOMS_IRISH/" regExp=".*\.nc"/> * <netcdf location="/home/guy/Data/POLCOMS_IRISH/polcoms_irish_hourly_20090320.nc" coordValue="2009-03-20T00:00:00Z" enhance="true" /> * <netcdf location="/home/guy/Data/POLCOMS_IRISH/polcoms_irish_hourly_20090321.nc" coordValue="2009-03-21T00:00:00Z" enhance="true" /> * <netcdf location="/home/guy/Data/POLCOMS_IRISH/polcoms_irish_hourly_20090322.nc" coordValue="2009-03-22T00:00:00Z" enhance="true" /> * </aggregation> * </netcdf> * * For more documentation see: * http://mailman.unidata.ucar.edu/software/thredds/current/netcdf-java/ncml/FmrcAggregation.html * * We then can do stuff like: * * ucar.nc2.dt.GridDataset gridDataset = f.getDatasetBest(); * * To get the single best aggregation of the overlapping time axis * * Then we need to work with GridDatasets in place of NetcdfDatasets. Stuff like: * * for(Variable variable : gridDataset.getNetcdfFile().getVariables()) { * // blah blah * } * * will be necessary. We need to check that that works with remote datasets too */ /* * We look for NetCDF-U variables to group mean/standard-deviation. * * We need to do this here because we want to subsequently ignore * parent variables */ Map<String, String[]> varId2AncillaryVars = new HashMap<String, String[]>(); for (Variable variable : nc.getVariables()) { /* * Just look for parent variables, since these may not have a * grid directly associated with them */ for (Attribute attr : variable.getAttributes()) { if (attr.getFullName().equalsIgnoreCase("ancillary_variables")) { varId2AncillaryVars.put(variable.getFullName(), attr.getStringValue().split(" ")); continue; } } } ucar.nc2.dt.GridDataset gridDataset = CdmUtils.getGridDataset(nc); List<GridVariableMetadata> vars = new ArrayList<GridVariableMetadata>(); /* * Store a map of component names. Key is the compound name, value * is a 2-element String array with x, y component IDs * * Also store a map of whether these components are really * eastward/northward, or whether they are locally u/v */ Map<String, String[]> xyComponentPairs = new HashMap<String, String[]>(); Map<String, Boolean> xyNameToTrueEN = new HashMap<String, Boolean>(); /* * Store a map of variable IDs to UncertML URLs. This will be used * to determine which components are mean/std/etc. * * TODO implement more than just Mean/SD */ Map<String, String> varId2UncertMLRefs = new HashMap<String, String>(); /* * Here we store the parent variable IDs and their corresponding * title. */ Map<String, String> parentVarId2Title = new HashMap<String, String>(); for (Gridset gridset : gridDataset.getGridsets()) { GridCoordSystem coordSys = gridset.getGeoCoordSystem(); HorizontalGrid hDomain = CdmUtils.createHorizontalGrid(coordSys); VerticalAxis zDomain = CdmUtils.createVerticalAxis(coordSys); TimeAxis tDomain = CdmUtils.createTimeAxis(coordSys); /* * Create a VariableMetadata object for each GridDatatype */ for (GridDatatype grid : gridset.getGrids()) { VariableDS variable = grid.getVariable(); String varId = variable.getFullName(); String name = getVariableName(variable); /* * If this is a parent variable for a stats collection, we * don't want it to be a normal variable as well. */ if (varId2AncillaryVars.containsKey(varId)) { parentVarId2Title.put(varId, name); continue; } /* * If it is a child variable is (potentially) referenced by * UncertML, store its ID and the (possible) UncertML URI */ for (Attribute attr : variable.getAttributes()) { if (attr.getFullName().equalsIgnoreCase("ref")) { varId2UncertMLRefs.put(varId, attr.getStringValue()); } } Parameter parameter = new Parameter( varId, variable.getShortName(), variable.getDescription(), variable.getUnitsString(), name); GridVariableMetadata metadata = new GridVariableMetadata( variable.getFullName(), parameter, hDomain, zDomain, tDomain, true); vars.add(metadata); if (name != null) { /* * Check for vector components */ if (name.contains("eastward_")) { String compoundName = name.replaceFirst("eastward_", ""); String[] cData; if (!xyComponentPairs.containsKey(compoundName)) { cData = new String[2]; xyComponentPairs.put(compoundName, cData); xyNameToTrueEN.put(compoundName, true); } cData = xyComponentPairs.get(compoundName); /* * By doing this, we will end up with the merged * coverage */ cData[0] = varId; } else if (name.contains("northward_")) { String compoundName = name.replaceFirst("northward_", ""); String[] cData; if (!xyComponentPairs.containsKey(compoundName)) { cData = new String[2]; xyComponentPairs.put(compoundName, cData); xyNameToTrueEN.put(compoundName, true); } cData = xyComponentPairs.get(compoundName); /* * By doing this, we will end up with the merged * coverage */ cData[1] = varId; } else if (name.matches("u-.*component")) { String compoundName = name.replaceFirst("u-(.*)component", "$1"); String[] cData; if (!xyComponentPairs.containsKey(compoundName)) { cData = new String[2]; xyComponentPairs.put(compoundName, cData); xyNameToTrueEN.put(compoundName, false); } cData = xyComponentPairs.get(compoundName); /* * By doing this, we will end up with the merged * coverage */ cData[0] = varId; } else if (name.matches("v-.*component")) { String compoundName = name.replaceFirst("v-(.*)component", "$1"); String[] cData; if (!xyComponentPairs.containsKey(compoundName)) { cData = new String[2]; xyComponentPairs.put(compoundName, cData); xyNameToTrueEN.put(compoundName, false); } cData = xyComponentPairs.get(compoundName); /* * By doing this, we will end up with the merged * coverage */ cData[1] = varId; } /* * We could potentially add a check for zonal/meridional * here if required. */ } } } CdmGridDataset cdmGridDataset = new CdmGridDataset(id, location, vars, CdmUtils.getOptimumDataReadingStrategy(nc)); for (Entry<String, String[]> componentData : xyComponentPairs.entrySet()) { String commonName = componentData.getKey(); String[] comps = componentData.getValue(); if (comps[0] != null && comps[1] != null) { cdmGridDataset.addVariablePlugin( new VectorPlugin(comps[0], comps[1], commonName, xyNameToTrueEN.get(commonName))); } } for (String statsCollectionId : varId2AncillaryVars.keySet()) { String[] ids = varId2AncillaryVars.get(statsCollectionId); String meanId = null; String stddevId = null; for (String statsVarIds : ids) { String uncertRef = varId2UncertMLRefs.get(statsVarIds); if (uncertRef != null && uncertRef.equalsIgnoreCase("http://www.uncertml.org/statistics/mean")) { meanId = statsVarIds; } if (uncertRef != null && uncertRef.equalsIgnoreCase( "http://www.uncertml.org/statistics/standard-deviation")) { stddevId = statsVarIds; } } if (meanId != null && stddevId != null) { MeanSDPlugin meanSDPlugin = new MeanSDPlugin(meanId, stddevId, parentVarId2Title.get(statsCollectionId)); cdmGridDataset.addVariablePlugin(meanSDPlugin); } } return cdmGridDataset; } finally { CdmUtils.closeDataset(nc); } }
/** * Opens the NetCDF dataset at the given location, using the dataset cache if {@code location} * represents an NcML aggregation. We cannot use the cache for OPeNDAP or single NetCDF files * because the underlying data may have changed and the NetcdfDataset cache may cache a dataset * forever. In the case of NcML we rely on the fact that server administrators ought to have set a * "recheckEvery" parameter for NcML aggregations that may change with time. It is desirable to * use the dataset cache for NcML aggregations because they can be time-consuming to assemble and * we don't want to do this every time a map is drawn. * * @param location The location of the data: a local NetCDF file, an NcML aggregation file or an * OPeNDAP location, {@literal i.e.} anything that can be passed to * NetcdfDataset.openDataset(location). * @return a {@link NetcdfDataset} object for accessing the data at the given location. * @throws IOException if there was an error reading from the data source. */ private NetcdfDataset openAndAggregateDataset(String location) throws IOException, EdalException { NetcdfDataset nc; if (location.startsWith("dods://") || location.startsWith("http://")) { /* * We have a remote dataset */ nc = CdmUtils.openDataset(location); } else { /* * We have a local dataset */ List<File> files = null; try { files = CdmUtils.expandGlobExpression(location); } catch (NullPointerException e) { System.out.println("NPE processing location: " + location); throw e; } if (files.size() == 0) { throw new EdalException( "The location " + location + " doesn't refer to any existing files."); } if (files.size() == 1) { location = files.get(0).getAbsolutePath(); nc = CdmUtils.openDataset(location); } else { /* * We have multiple files in a glob expression. We write some * NcML and use the NetCDF aggregation libs to parse this into * an aggregated dataset. * * If we have already generated the ncML on a previous call, * just use that. */ if (ncmlString == null) { /* * Find the name of the time dimension */ NetcdfDataset first = openAndAggregateDataset(files.get(0).getAbsolutePath()); String timeDimName = null; for (Variable var : first.getVariables()) { if (var.isCoordinateVariable()) { for (Attribute attr : var.getAttributes()) { if (attr.getFullName().equalsIgnoreCase("units") && attr.getStringValue().contains(" since ")) { /* * This is the time dimension. Since this is * a co-ordinate variable, there is only 1 * dimension */ Dimension timeDimension = var.getDimension(0); timeDimName = timeDimension.getFullName(); } } } } first.close(); if (timeDimName == null) { throw new EdalException("Cannot join multiple files without time dimensions"); } /* * We can't assume that the glob expression will have * returned the files in time order. * * We could assume that alphabetical == time ordered (and * for properly named files it will - but let's not rely on * our users having sensible naming conventions... * * Sort the list using a comparator which opens the file and * gets the first value of the time dimension */ final String aggDimName = timeDimName; Collections.sort( files, new Comparator<File>() { @Override public int compare(File ncFile1, File ncFile2) { NetcdfFile nc1 = null; NetcdfFile nc2 = null; try { nc1 = NetcdfFile.open(ncFile1.getAbsolutePath()); nc2 = NetcdfFile.open(ncFile2.getAbsolutePath()); Variable timeVar1 = nc1.findVariable(aggDimName); Variable timeVar2 = nc2.findVariable(aggDimName); long time1 = timeVar1.read().getLong(0); long time2 = timeVar2.read().getLong(0); return Long.compare(time1, time2); } catch (Exception e) { /* * There was a problem reading the data. Sort * alphanumerically by filename and hope for the * best... * * This catches all exceptions because however * it fails this is still our best option. * * If the error is a genuine problem, it'll show * up as soon as we try and aggregate. */ return ncFile1.getAbsolutePath().compareTo(ncFile2.getAbsolutePath()); } finally { if (nc1 != null) { try { nc1.close(); } catch (IOException e) { log.error("Problem closing netcdf file", e); } } if (nc2 != null) { try { nc2.close(); } catch (IOException e) { log.error("Problem closing netcdf file", e); } } } } }); /* * Now create the NcML string and use it to create an * aggregated dataset */ StringBuffer ncmlStringBuffer = new StringBuffer(); ncmlStringBuffer.append( "<netcdf xmlns=\"http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2\">"); ncmlStringBuffer.append( "<aggregation dimName=\"" + timeDimName + "\" type=\"joinExisting\">"); for (File file : files) { ncmlStringBuffer.append("<netcdf location=\"" + file.getAbsolutePath() + "\"/>"); } ncmlStringBuffer.append("</aggregation>"); ncmlStringBuffer.append("</netcdf>"); ncmlString = ncmlStringBuffer.toString(); } nc = NcMLReader.readNcML(new StringReader(ncmlString), null); } } return nc; }