/** * Load feature. * * @param parameter the parameter * @param clusterSet the cluster set * @param descriptor the descriptor * @return the audio feature set * @throws IOException Signals that an I/O exception has occurred. * @throws DiarizationException the diarization exception */ public static AudioFeatureSet loadFeature( Parameter parameter, ClusterSet clusterSet, String descriptor) throws IOException, DiarizationException { String oldDescriptor = parameter.getParameterInputFeature().getFeaturesDescriptorAsString(); parameter.getParameterInputFeature().setFeaturesDescription(descriptor); AudioFeatureSet result = MainTools.readFeatureSet(parameter, clusterSet); parameter.getParameterInputFeature().setFeaturesDescription(oldDescriptor); return result; }
/** * Gets the parameter. * * @param args the args * @return the parameter */ public static Parameter getParameter(String[] args) { Parameter parameter = new Parameter(); parameter .getParameterInputFeature() .setFeaturesDescription("audio2sphinx,1:1:0:0:0:0,13,0:0:0:0"); parameter.readParameters(args); return parameter; }
/** * Info. * * @param parameter the parameter * @param programName the program name * @throws IllegalArgumentException the illegal argument exception * @throws IllegalAccessException the illegal access exception * @throws InvocationTargetException the invocation target exception */ public static void info(Parameter parameter, String programName) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException { if (parameter.help) { logger.config(parameter.getSeparator2()); logger.config("name = " + programName); logger.config(parameter.getSeparator()); parameter.logShow(); parameter.getParameterInputFeature().logAll(); // fInMask logger.config(parameter.getSeparator()); parameter.getParameterSegmentationInputFile().logAll(); // sInMask parameter.getParameterSegmentationInputFile2().logAll(); // sInMask parameter.getParameterSegmentationOutputFile().logAll(); // sOutMask logger.config(parameter.getSeparator()); parameter.getParameterDiarization().logAll(); logger.config(parameter.getSeparator()); } }
/** * Tun ester2 speaker clr clustering. * * @param referenceClusterSet the reference cluster set * @param uemClusterSet the uem cluster set * @param partialKey the partial key * @param method the method * @param clusterSetBase the cluster set base * @param clusterSet the cluster set * @param featureSet the feature set * @param parameter the parameter * @return the diarization result list * @throws Exception the exception */ public DiarizationResultList tunEster2SpeakerCLRClustering( ClusterSet referenceClusterSet, ClusterSet uemClusterSet, String partialKey, String method, ClusterSet clusterSetBase, ClusterSet clusterSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String oldSpeechDetectorMethod = parameter.getParameterInputFeature().getSpeechMethodAsString(); double oldSpeechDetectorThreshold = parameter.getParameterInputFeature().getSpeechThreshold(); String oldModelKind = parameter.getParameterModel().getModelKindAsString(); int oldNumberOfComponent = parameter.getParameterModel().getNumberOfComponents(); String oldMethod = parameter.getParameterClustering().getMethodAsString(); double oldThreshold = parameter.getParameterClustering().getThreshold(); String oldEMControl = parameter.getParameterEM().getEMControl(); int oldNTop = parameter.getParameterTopGaussian().getScoreNTop(); boolean oldSaveAll = parameter.getParameterDiarization().isSaveAllStep(); DiarizationResultList localResult = new DiarizationResultList(cMin, cMax, mult); DiarizationError computeError = new DiarizationError(referenceClusterSet, uemClusterSet); double prevScore = cMin; // ---- Begin NEW v 1.13 --- parameter.getParameterInputFeature().setSpeechMethod("E"); parameter.getParameterInputFeature().setSpeechThreshold(0.1); // ---- End NEW v 1.13 --- String FeatureFormat = "featureSetTransformation"; String dir = "ester2"; InputStream ubmInputStream = getClass().getResourceAsStream(dir + "/ubm.gmm"); GMMArrayList ubmVect = MainTools.readGMMContainer(ubmInputStream, parameter.getParameterModel()); GMM ubm = ubmVect.get(0); /* * int nbCep = 16; logger.info("---> nbCep:"+nbCep); FeatureSet featureSet2 = loadFeature(parameter, clusterSet, "audio16kHz2sphinx,1:3:2:0:0:0,"+nbCep+",1:1:300:4"); logger.info("---> nbCep:"+nbCep); //logger.fine("*** nbFeaturesNorm:" + * clusterSetBase.getLength()); */ // A tester AudioFeatureSet featureSet2 = loadFeature(featureSet, parameter, clusterSet, FeatureFormat + ",1:3:2:0:0:0,13,1:1:300:4"); // v5.14 // FeatureSet featureSet2 = loadFeature(featureSet, parameter, clusterSet, FeatureFormat // + ",1:3:2:0:0:0,13,1:1:0:0"); parameter.getParameterModel().setModelKind("DIAG"); parameter.getParameterModel().setNumberOfComponents(ubm.getNbOfComponents()); parameter.getParameterClustering().setMethod(method); // ---- Begin NEW v 1.19 --- // parameter.getParameterEM().setEMControl("1,1,0.01"); // parameter.getParameterClustering().setThreshold(0); // ---- End NEW v 1.19 --- parameter.getParameterClustering().setThreshold(cMax); parameter.getParameterEM().setEMControl("1,5,0.01"); parameter.getParameterTopGaussian().setScoreNTop(5); parameter.getParameterDiarization().setSaveAllStep(false); CLRHClustering clustering = new CLRHClustering(clusterSet, featureSet2, parameter, ubm); // int nbCluster = clusterSet.clusterGetSize(); // logger.info("initialise clustering CLR clusterSet:"+clusterSet); clustering.initialize(); double score = clustering.getScoreOfCandidatesForMerging(); DiarizationResult error = computeError.scoreOfMatchedSpeakers(clustering.getClusterSet()); double errorRate = error.getErrorRate(); localResult.setResult(prevScore, score, error); // prevScore = Math.max(score, prevScore); logger.fine( "first " + parameter.show + " key=" + partialKey + " clrScore=" + score + " clrErrorRate=" + errorRate + " clrSize=" + clustering.getSize() + "/" + referenceClusterSet.clusterGetSize()); while ((score < cMax) && (clustering.getSize() > 1)) { localResult.setResult(prevScore, score, error); prevScore = Math.max(score, prevScore); clustering.mergeCandidates(); // -- start V5.16 -- // logger.info("--> Decoding"); // ClusterSet decodeClusterSet = MDecode.make(featureSet2, clustering.getClusterSet(), // clustering.getGmmList(), parameter); // logger.info("--> Clustering"); // featureSet2 = loadFeature(featureSet, parameter, decodeClusterSet, FeatureFormat // + ",1:3:2:0:0:0,13,1:1:300:4"); // clustering = new CLRHClustering(decodeClusterSet, featureSet2, parameter, ubm); // clustering.initialize(); // -- end V5.16 -- score = clustering.getScoreOfCandidatesForMerging(); error = computeError.scoreOfMatchedSpeakers(clustering.getClusterSet()); errorRate = error.getErrorRate(); // localResult.setResult(prevScore, score, error); // prevScore = Math.max(score, prevScore); logger.fine( parameter.show + " key=" + partialKey + " clrScore=" + score + " clrErrorRate=" + errorRate + " clrSize=" + clustering.getSize() + "/" + referenceClusterSet.clusterGetSize()); } localResult.setResult(prevScore, score, error); localResult.setResult(score, cMax, error); logger.finer(parameter.show + " key=" + partialKey + " resultat du fichier"); localResult.log("partial result: " + parameter.show + " " + partialKey); clustering.reset(); parameter.getParameterModel().setNumberOfComponents(oldNumberOfComponent); parameter.getParameterModel().setModelKind(oldModelKind); parameter.getParameterClustering().setMethod(oldMethod); parameter.getParameterClustering().setThreshold(oldThreshold); parameter.getParameterEM().setEMControl(oldEMControl); parameter.getParameterTopGaussian().setScoreNTop(oldNTop); parameter.getParameterInputFeature().setSpeechMethod(oldSpeechDetectorMethod); parameter.getParameterInputFeature().setSpeechThreshold(oldSpeechDetectorThreshold); parameter.getParameterDiarization().setSaveAllStep(oldSaveAll); return localResult; }
/** * Tun ester2 diarization. * * @param parameter the parameter * @param clusterSet the cluster set * @return the tree map * @throws DiarizationException the diarization exception * @throws Exception the exception */ public TreeMap<String, DiarizationResultList> tunEster2Diarization( Parameter parameter, ClusterSet clusterSet) throws DiarizationException, Exception { TreeMap<String, DiarizationResultList> result = new TreeMap<String, DiarizationResultList>(); // double paramThr = parameter.getParameterClustering().getThreshold(); lMin = parameter.getParameterDiarization().getThreshold("l"); lMax = parameter.getParameterDiarization().getMaxThreshold("l"); hMin = parameter.getParameterDiarization().getThreshold("h"); hMax = parameter.getParameterDiarization().getMaxThreshold("h"); dMin = parameter.getParameterDiarization().getThreshold("d"); dMax = parameter.getParameterDiarization().getMaxThreshold("d"); cMin = parameter.getParameterDiarization().getThreshold("c"); cMax = parameter.getParameterDiarization().getMaxThreshold("c"); String featureDesc = parameter.getParameterInputFeature().getFeaturesDescriptorAsString(); AudioFeatureSet featureSet = null; ClusterSet clustersSegInit = null; if (parameter.getParameterDiarization().isLoadInputSegmentation() == false) { featureSet = loadFeature(parameter, clusterSet, featureDesc); featureSet.setCurrentShow(parameter.show); int nbFeatures = featureSet.getNumberOfFeatures(); clusterSet.getFirstCluster().firstSegment().setLength(nbFeatures); clustersSegInit = sanityCheck(clusterSet, featureSet, parameter); } else { featureSet = loadFeature(parameter, clusterSet, featureDesc); featureSet.setCurrentShow(parameter.show); clustersSegInit = sanityCheck(clusterSet, featureSet, parameter); featureSet = loadFeature(parameter, clustersSegInit, featureDesc); featureSet.setCurrentShow(parameter.show); } // seg IRIT // ClusterSet clustersSegSave = clustersSegInit; // seg IRIT ClusterSet referenceClusterSet = MainTools.readTheSecondClusterSet(parameter); ClusterSet uemClusterSet = MainTools.readThe3rdClusterSet(parameter); if (parameter.getParameterDiarization().isLastStepOnly()) { String key = "l=" + lMin + " h=" + hMin + " d=" + dMin; DiarizationResultList values = null; if (parameter.getParameterDiarization().isCEClustering() == false) { logger.warning(" nothing to do isCEClustering == false"); } else { values = tunEster2SpeakerCLRClustering( referenceClusterSet, uemClusterSet, key, "ce", clusterSet, clusterSet, featureSet, parameter); } result.put(key, values); return result; } ClusterSet clustersSegSave = segmentation("GLR", "FULL", clustersSegInit, featureSet, parameter); for (double l = lMin; l <= lMax; l += 0.5) { ClusterSet clustersSeg = clustersSegSave.clone(); logger.finest("clustering l=" + l); ClusterSet clustersLClust = clusteringLinear(l, clustersSeg, featureSet, parameter); // ---- Begin NEW v 1.14 --- for (double h = hMin; h <= hMax; h += 0.5) { // for (double h = hMin; h <= hMax; h += 0.2) { // ---- end NEW v 1.14 --- // if (h > l) { ClusterSet clustersHClust = clustering(h, clustersLClust, featureSet, parameter); for (double d = dMin; d <= dMax; d += 50) { ClusterSet clustersDClust = decode(8, d, clustersHClust, featureSet, parameter); // double error = DiarizationError.scoreOfMatchedSpeakers(referenceClusterSet, // clustersDClust); ClusterSet clustersSplitClust = speech( "10,10,50", clusterSet, clustersSegInit, clustersDClust, featureSet, parameter); ClusterSet clustersGender = gender(clusterSet, clustersSplitClust, featureSet, parameter); String key = "l=" + l + " h=" + h + " d=" + d; DiarizationResultList values = null; if (parameter.getParameterDiarization().isCEClustering() == false) { values = new DiarizationResultList(0, 0, 1); DiarizationError computeError = new DiarizationError(referenceClusterSet, uemClusterSet); DiarizationResult error = computeError.scoreOfMatchedSpeakers(clustersGender); values.setResult(0, 0, error); logger.finer(parameter.show + " key=" + key + " resultat du fichier"); values.log("partial result: " + parameter.show + " " + key); } else { // V4.19 = CLUST_H_BIC_GMM_MAP // values = tunEster2SpeakerCLRClustering(referenceClusterSet, key, "bicgmmmap", // clustersGender, clustersGender, featureSet, parameter); // V5.16 = ce_d // values = tunEster2SpeakerCLRClustering(referenceClusterSet, key, "ce_d", // clustersGender, clustersGender, featureSet, parameter); values = tunEster2SpeakerCLRClustering( referenceClusterSet, uemClusterSet, key, "ce", clustersGender, clustersGender, featureSet, parameter); } if (result.containsKey(key)) { result.get(key).addResultArray(values); } else { result.put(key, values); } } // } } } return result; }
/** * Ester2 version. * * @param parameter the parameter * @throws DiarizationException the diarization exception * @throws Exception the exception */ public void ester2Version(Parameter parameter) throws DiarizationException, Exception { // ** Caution this system is developed using Sphinx MFCC computed with legacy mode ClusterSet referenceClusterSet = null; if (!parameter.getParameterSegmentationInputFile2().getMask().equals("")) { referenceClusterSet = MainTools.readTheSecondClusterSet(parameter); } ClusterSet uemClusterSet = null; if (!parameter.getParameterSegmentationInputFile3().getMask().equals("")) { referenceClusterSet = MainTools.readThe3rdClusterSet(parameter); } ParameterBNDiarization parameterDiarization = parameter.getParameterDiarization(); // ** mask for the output of the segmentation file ClusterSet clusterSet = initialize(parameter); // ** load the features, sphinx format (13 MFCC with C0) or compute it form a wave file AudioFeatureSet featureSet = loadFeature( parameter, clusterSet, parameter.getParameterInputFeature().getFeaturesDescriptorAsString()); featureSet.setCurrentShow(parameter.show); int nbFeatures = featureSet.getNumberOfFeatures(); if (parameter.getParameterDiarization().isLoadInputSegmentation() == false) { clusterSet.getFirstCluster().firstSegment().setLength(nbFeatures); } // clusterSet.debug(3); ClusterSet clustersSegInit = sanityCheck(clusterSet, featureSet, parameter); ClusterSet clustersSeg = segmentation("GLR", "FULL", clustersSegInit, featureSet, parameter); // Seg IRIT // ClusterSet clustersSegInit = clusterSet; // ClusterSet clustersSeg = clusterSet; // Seg IRIT ClusterSet clustersLClust = clusteringLinear( parameterDiarization.getThreshold("l"), clustersSeg, featureSet, parameter); ClusterSet clustersHClust = clustering(parameterDiarization.getThreshold("h"), clustersLClust, featureSet, parameter); // MainTools.writeClusterSet(parameter, clustersHClust, false); ClusterSet clustersDClust = decode(8, parameterDiarization.getThreshold("d"), clustersHClust, featureSet, parameter); ClusterSet clustersSplitClust = speech("10,10,50", clusterSet, clustersSegInit, clustersDClust, featureSet, parameter); ClusterSet clustersGender = gender(clusterSet, clustersSplitClust, featureSet, parameter); if (parameter.getParameterDiarization().isCEClustering()) { ClusterSet clustersCLR = speakerClustering( parameterDiarization.getThreshold("c"), "ce", clustersSegInit, clustersGender, featureSet, parameter); MainTools.writeClusterSet(parameter, clustersCLR, false); if (referenceClusterSet != null) { DiarizationError computeError = new DiarizationError(referenceClusterSet, uemClusterSet); computeError.scoreOfMatchedSpeakers(clustersCLR); } } else { MainTools.writeClusterSet(parameter, clustersGender, false); } }
/** * Speaker clustering. * * @param threshold the threshold * @param method the method * @param clusterSetBase the cluster set base * @param clustersSet the clusters set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet speakerClustering( double threshold, String method, ClusterSet clusterSetBase, ClusterSet clustersSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); String oldMethod = parameter.getParameterClustering().getMethodAsString(); double oldThreshold = parameter.getParameterClustering().getThreshold(); String oldEMControl = parameter.getParameterEM().getEMControl(); int oldNTop = parameter.getParameterTopGaussian().getScoreNTop(); String oldSpeechDetectorMethod = parameter.getParameterInputFeature().getSpeechMethodAsString(); double oldSpeechDetectorThreshold = parameter.getParameterInputFeature().getSpeechThreshold(); // ** bottom up hierarchical classification using GMMs // ** one for each cluster, trained by MAP adaptation of a UBM composed of the fusion of // 4x128GMM // ** the feature normalization use feature mapping technique, after the cluster frames are // centered and reduced String dir = "ester2"; InputStream ubmInputStream = getClass().getResourceAsStream(dir + "/ubm.gmm"); GMMArrayList ubmVect = MainTools.readGMMContainer(ubmInputStream, parameter.getParameterModel()); GMM ubm = ubmVect.get(0); // int nbCep = ubm.getDimension() + 1; String FeatureFormat = "featureSetTransformation"; parameter.getParameterInputFeature().setSpeechMethod("E"); parameter.getParameterInputFeature().setSpeechThreshold(0.1); AudioFeatureSet featureSet2 = loadFeature( featureSet, parameter, clustersSet, FeatureFormat + ",1:3:2:0:0:0,13,1:1:300:4"); parameter.getParameterClustering().setMethod(method); parameter.getParameterClustering().setThreshold(threshold); parameter.getParameterEM().setEMControl("1,5,0.01"); parameter.getParameterTopGaussian().setScoreNTop(5); // ---- Begin NEW v 1.13 --- // if (parameter.parameterSpeechDetector.useSpeechDetection() == true) { // MSpeechDetector.EnergyThresholdMethod(clustersSet, featureSet, parameter); // } // ---- End NEW v 1.13 --- boolean saveAll = parameter.getParameterDiarization().isSaveAllStep(); parameter.getParameterDiarization().setSaveAllStep(false); ClusterSet clustersCLR = MClust.make(featureSet2, clustersSet, parameter, ubm); parameter.getParameterDiarization().setSaveAllStep(saveAll); parameter.getParameterSegmentationOutputFile().setMask(mask); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".c.seg"); MainTools.writeClusterSet(parameter, clustersCLR, false); } parameter.getParameterSegmentationOutputFile().setMask(mask); parameter.getParameterClustering().setMethod(oldMethod); parameter.getParameterClustering().setThreshold(oldThreshold); parameter.getParameterEM().setEMControl(oldEMControl); parameter.getParameterTopGaussian().setScoreNTop(oldNTop); parameter.getParameterInputFeature().setSpeechMethod(oldSpeechDetectorMethod); parameter.getParameterInputFeature().setSpeechThreshold(oldSpeechDetectorThreshold); return clustersCLR; }