/** * Inspects candidate group to determine if it comprises instance of Format * * @param jhove2 framework configured with SourceFactory * @return FormatIdentification for this group if it comprises instance of a Format; otherwise * returns null; * @throws JHOVE2Exception */ protected ClumpSource recognizeGroupedSource(GlobPathMatchInfoGroup fileGroup, JHOVE2 jhove2) throws JHOVE2Exception { FormatIdentification fi = null; ClumpSource clumpSource = null; if (fileGroup.getMustHaveCount() >= this.minMustHavesToIdentify) { fi = new FormatIdentification( this.format, GLOB_PATH_CONFIDENCE, this.getReportableIdentifier()); if (jhove2.getSourceFactory() == null) { throw new JHOVE2Exception("JHOVE2 SourceFactory is null"); } clumpSource = jhove2.getSourceFactory().getClumpSource(jhove2); clumpSource = (ClumpSource) clumpSource.addPresumptiveFormat(fi); for (GlobPathMatchInfo sourceInfo : fileGroup.getSourceMatchInfoList()) { if ((sourceInfo.isMustHave() || sourceInfo.isMayHave()) || (this.includeUnmatchedFromGroup)) { Source sourceInfoSource = sourceInfo.getSource(); sourceInfoSource = clumpSource.addChildSource(sourceInfoSource); sourceInfo.setSource(sourceInfoSource); } } } return clumpSource; }
/** * Constructs candidate instances of an aggregate format by grouping together all children of * source parameter that match fileGroupingToken * * @param source Source object whose child Sources are to be explored for groups constituting * instances of a format * @return Collection of GlobPathMatchInfoGroup objects, each of which contains a list of likely * related Sources comprising an instance of a Format, and indications as to whether or not * each Source in the group is a required or optional or unspecified component of that Format * instances * @throws JHOVE2Exception */ protected Collection<GlobPathMatchInfoGroup> groupSources(Source source) throws JHOVE2Exception { HashMap<String, GlobPathMatchInfoGroup> groupMap = new HashMap<String, GlobPathMatchInfoGroup>(); for (Source childSource : source.getChildSources()) { File sourceFile = childSource.getFile(); if (sourceFile != null) { String filePath = childSource.getFile().getPath(); // does the Source file path match the pattern that indicates a related file? Matcher m = this.fileGroupingPattern.matcher(filePath); if (m.matches()) { // might have more than one instance of a format in the Source, so // we have to group related files together String groupString = null; String mustHaveString = null; String mayHaveString = null; try { // get the value of the capture group which is the key to a format instance // (group of files) groupString = m.group(this.fileGroupingCaptureGroupIndex); // get the value of the capture group that indicates a file in the group // is one of the files required by the format definition mustHaveString = m.group(this.mustHaveCaptureGroupIndex); // get the value of the capture group that indicates a file in the group // is one of the files considered optional by the format definition mayHaveString = m.group(this.mayHaveCaptureGroupIndex); } catch (IllegalStateException ise) { // should not occur, we were inside if (m.matches()) statement throw new JHOVE2Exception( "Exception thrown grouping patterns: Check configuration", ise); } catch (IndexOutOfBoundsException iob) { throw new JHOVE2Exception( "Exception thrown grouping patterns: Check configuration", iob); } GlobPathMatchInfo fileInfo = new GlobPathMatchInfo(childSource); boolean matchesMustHaves = false; Matcher m2 = null; if (this.mustHavePattern != null) { m2 = this.mustHavePattern.matcher(mustHaveString); matchesMustHaves = m2.matches(); } fileInfo.setMustHave(matchesMustHaves); boolean matchesMayHaves = false; if (this.mayHavePattern != null) { m2 = this.mayHavePattern.matcher(mayHaveString); matchesMayHaves = m2.matches(); } fileInfo.setMayHave(matchesMayHaves); GlobPathMatchInfoGroup infoGroup; // is this the first occurrence of grouping key? if (!(groupMap.containsKey(groupString))) { // if so, add grouping key and new GlobPathMatchInfoGroup to groupMaP infoGroup = new GlobPathMatchInfoGroup(); infoGroup.setGroupKey(groupString); groupMap.put(groupString, infoGroup); } else { // otherwise just retrieve infoGroup = groupMap.get(groupString); } // add information about current Source to list associated with this grouping key infoGroup.getSourceMatchInfoList().add(fileInfo); // increment counter information associated with this grouping key if (matchesMustHaves) { infoGroup.setMustHaveCount(infoGroup.getMustHaveCount() + 1); } if (matchesMayHaves) { infoGroup.setMayHaveCount(infoGroup.getMayHaveCount() + 1); } if (!matchesMustHaves && !matchesMayHaves) { infoGroup.setUnmatchedCount(infoGroup.getUnmatchedCount() + 1); } } // end if (m.matches()){ } // end if sourceFile != null } // end for (Source childSource:source.getChildSources()) // we don't need the keys to the map any more; just return the values return groupMap.values(); }