/** * Gender. * * @param clusterSetBase the cluster set base * @param clusterSet the cluster set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet gender( ClusterSet clusterSetBase, ClusterSet clusterSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); boolean oldByCluster = parameter.getParameterScore().isByCluster(); boolean oldGender = parameter.getParameterScore().isGender(); String FeatureFormat = "featureSetTransformation"; AudioFeatureSet featureSet2 = loadFeature(featureSet, parameter, clusterSet, FeatureFormat + ",1:3:2:0:0:0,13,1:1:0:0"); String dir = "ester2"; InputStream genderInputStream = getClass().getResourceAsStream(dir + "/gender.gmms"); GMMArrayList genderVector = MainTools.readGMMContainer(genderInputStream, parameter.getParameterModel()); parameter.getParameterScore().setByCluster(true); parameter.getParameterScore().setGender(true); ClusterSet clustersGender = MScore.make(featureSet2, clusterSet, genderVector, null, parameter); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".g.seg"); MainTools.writeClusterSet(parameter, clustersGender, false); } parameter.getParameterSegmentationOutputFile().setMask(mask); parameter.getParameterScore().setByCluster(oldByCluster); parameter.getParameterScore().setGender(oldGender); return clustersGender; }
/** * Clustering. * * @param threshold the threshold * @param clusterSet the cluster set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet clustering( double threshold, ClusterSet clusterSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); String oldMethod = parameter.getParameterClustering().getMethodAsString(); double oldThreshold = parameter.getParameterClustering().getThreshold(); String oldModelKind = parameter.getParameterModel().getModelKindAsString(); int oldNumberOfComponent = parameter.getParameterModel().getNumberOfComponents(); // --- begin NEW v 1.14 / 4.16 / 4.18 / 4.20--- parameter.getParameterClustering().setMethod("h"); // parameter.getParameterClustering().setMethod("sr"); // --- end NEW v 1.14 --- parameter.getParameterClustering().setThreshold(threshold); logger.finer( "method:" + parameter.getParameterClustering().getMethod() + " thr:" + parameter.getParameterClustering().getThreshold()); parameter.getParameterModel().setModelKind("FULL"); parameter.getParameterModel().setNumberOfComponents(1); ClusterSet clustersHClust = MClust.make(featureSet, clusterSet, parameter, null); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".h.seg"); MainTools.writeClusterSet(parameter, clustersHClust, false); } parameter.getParameterSegmentation().setMethod(oldMethod); parameter.getParameterModel().setNumberOfComponents(oldNumberOfComponent); parameter.getParameterModel().setModelKind(oldModelKind); parameter.getParameterClustering().setThreshold(oldThreshold); parameter.getParameterSegmentationOutputFile().setMask(mask); return clustersHClust; }
/** * Clustering linear. * * @param threshold the threshold * @param clusterSet the cluster set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet clusteringLinear( double threshold, ClusterSet clusterSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); String oldMethod = parameter.getParameterClustering().getMethodAsString(); double oldThreshold = parameter.getParameterClustering().getThreshold(); String oldModelKind = parameter.getParameterModel().getModelKindAsString(); int oldNumberOfComponent = parameter.getParameterModel().getNumberOfComponents(); parameter.getParameterModel().setModelKind("FULL"); parameter.getParameterModel().setNumberOfComponents(1); parameter.getParameterClustering().setMethod("l"); parameter.getParameterClustering().setThreshold(threshold); ClusterSet clustersLClust = MClust.make(featureSet, clusterSet, parameter, null); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".l.seg"); MainTools.writeClusterSet(parameter, clustersLClust, false); } parameter.getParameterSegmentation().setMethod(oldMethod); parameter.getParameterModel().setNumberOfComponents(oldNumberOfComponent); parameter.getParameterModel().setModelKind(oldModelKind); parameter.getParameterClustering().setThreshold(oldThreshold); parameter.getParameterSegmentationOutputFile().setMask(mask); return clustersLClust; }
/** * Segmentation. * * @param method the method * @param kind the kind * @param clusterSet the cluster set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet segmentation( String method, String kind, ClusterSet clusterSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); String oldMethod = parameter.getParameterSegmentation().getMethodAsString(); int oldNumberOfComponent = parameter.getParameterModel().getNumberOfComponents(); String oldModelKind = parameter.getParameterModel().getModelKindAsString(); parameter.getParameterSegmentation().setMethod(method); parameter.getParameterModel().setNumberOfComponents(1); parameter.getParameterModel().setModelKind(kind); ClusterSet clustersSeg = new ClusterSet(); MSeg.make(featureSet, clusterSet, clustersSeg, parameter); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".s.seg"); MainTools.writeClusterSet(parameter, clustersSeg, false); } parameter.getParameterSegmentation().setMethod(oldMethod); parameter.getParameterModel().setNumberOfComponents(oldNumberOfComponent); parameter.getParameterModel().setModelKind(oldModelKind); parameter.getParameterSegmentationOutputFile().setMask(mask); return clustersSeg; }
/** * Decode. * * @param nbComp the nb comp * @param threshold the threshold * @param clusterSet the cluster set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet decode( int nbComp, double threshold, ClusterSet clusterSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); String oldModelKind = parameter.getParameterModel().getModelKindAsString(); int oldNumberOfComponent = parameter.getParameterModel().getNumberOfComponents(); // ** Train GMM for each cluster. // ** GMM is a 8 component gaussian with diagonal covariance matrix // ** one GMM = one speaker = one cluster // ** initialization of the GMMs : // ** - same global covariance for each gaussian, // ** - 1/8 for the weight, // ** - means are initialized with the mean of 10 successive vectors taken parameter.getParameterModel().setModelKind("DIAG"); parameter.getParameterModel().setNumberOfComponents(nbComp); GMMArrayList gmmInitVect = new GMMArrayList(clusterSet.clusterGetSize()); MTrainInit.make(featureSet, clusterSet, gmmInitVect, parameter); // ** EM training of the initialized GMM GMMArrayList gmmVect = new GMMArrayList(clusterSet.clusterGetSize()); MTrainEM.make(featureSet, clusterSet, gmmInitVect, gmmVect, parameter); // ** set the penalty to move from the state i to the state j, penalty to move from i to i is // equal to 0 parameter.getParameterDecoder().setDecoderPenalty(String.valueOf(threshold)); // ** make Viterbi decoding using the 8-GMM set // ** one state = one GMM = one speaker = one cluster ClusterSet clustersDClust = MDecode.make(featureSet, clusterSet, gmmVect, parameter); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".d.seg"); MainTools.writeClusterSet(parameter, clustersDClust, false); } // ** move the boundaries of the segment in low energy part of the signal ClusterSet clustersAdjClust = SAdjSeg.make(featureSet, clustersDClust, parameter); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".adj.seg"); MainTools.writeClusterSet(parameter, clustersAdjClust, false); } parameter.getParameterSegmentationOutputFile().setMask(mask); parameter.getParameterModel().setNumberOfComponents(oldNumberOfComponent); parameter.getParameterModel().setModelKind(oldModelKind); return clustersAdjClust; }
/** * Sanity check. * * @param clusterSet the cluster set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws DiarizationException the diarization exception * @throws IOException Signals that an I/O exception has occurred. * @throws ParserConfigurationException the parser configuration exception * @throws SAXException the sAX exception * @throws TransformerException the transformer exception */ public ClusterSet sanityCheck( ClusterSet clusterSet, AudioFeatureSet featureSet, Parameter parameter) throws DiarizationException, IOException, ParserConfigurationException, SAXException, TransformerException { String mask = parameter.getParameterSegmentationOutputFile().getMask(); ClusterSet clustersSegInit = new ClusterSet(); MSegInit.make(featureSet, clusterSet, clustersSegInit, parameter); clustersSegInit.collapse(); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".i.seg"); MainTools.writeClusterSet(parameter, clustersSegInit, false); } parameter.getParameterSegmentationOutputFile().setMask(mask); return clustersSegInit; }
/** * Info. * * @param parameter the parameter * @param programName the program name * @throws IllegalArgumentException the illegal argument exception * @throws IllegalAccessException the illegal access exception * @throws InvocationTargetException the invocation target exception */ public static void info(Parameter parameter, String programName) throws IllegalArgumentException, IllegalAccessException, InvocationTargetException { if (parameter.help) { logger.config(parameter.getSeparator2()); logger.config("name = " + programName); logger.config(parameter.getSeparator()); parameter.logShow(); parameter.getParameterInputFeature().logAll(); // fInMask logger.config(parameter.getSeparator()); parameter.getParameterSegmentationInputFile().logAll(); // sInMask parameter.getParameterSegmentationInputFile2().logAll(); // sInMask parameter.getParameterSegmentationOutputFile().logAll(); // sOutMask logger.config(parameter.getSeparator()); parameter.getParameterDiarization().logAll(); logger.config(parameter.getSeparator()); } }
/** * Speaker clustering. * * @param threshold the threshold * @param method the method * @param clusterSetBase the cluster set base * @param clustersSet the clusters set * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet speakerClustering( double threshold, String method, ClusterSet clusterSetBase, ClusterSet clustersSet, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); String oldMethod = parameter.getParameterClustering().getMethodAsString(); double oldThreshold = parameter.getParameterClustering().getThreshold(); String oldEMControl = parameter.getParameterEM().getEMControl(); int oldNTop = parameter.getParameterTopGaussian().getScoreNTop(); String oldSpeechDetectorMethod = parameter.getParameterInputFeature().getSpeechMethodAsString(); double oldSpeechDetectorThreshold = parameter.getParameterInputFeature().getSpeechThreshold(); // ** bottom up hierarchical classification using GMMs // ** one for each cluster, trained by MAP adaptation of a UBM composed of the fusion of // 4x128GMM // ** the feature normalization use feature mapping technique, after the cluster frames are // centered and reduced String dir = "ester2"; InputStream ubmInputStream = getClass().getResourceAsStream(dir + "/ubm.gmm"); GMMArrayList ubmVect = MainTools.readGMMContainer(ubmInputStream, parameter.getParameterModel()); GMM ubm = ubmVect.get(0); // int nbCep = ubm.getDimension() + 1; String FeatureFormat = "featureSetTransformation"; parameter.getParameterInputFeature().setSpeechMethod("E"); parameter.getParameterInputFeature().setSpeechThreshold(0.1); AudioFeatureSet featureSet2 = loadFeature( featureSet, parameter, clustersSet, FeatureFormat + ",1:3:2:0:0:0,13,1:1:300:4"); parameter.getParameterClustering().setMethod(method); parameter.getParameterClustering().setThreshold(threshold); parameter.getParameterEM().setEMControl("1,5,0.01"); parameter.getParameterTopGaussian().setScoreNTop(5); // ---- Begin NEW v 1.13 --- // if (parameter.parameterSpeechDetector.useSpeechDetection() == true) { // MSpeechDetector.EnergyThresholdMethod(clustersSet, featureSet, parameter); // } // ---- End NEW v 1.13 --- boolean saveAll = parameter.getParameterDiarization().isSaveAllStep(); parameter.getParameterDiarization().setSaveAllStep(false); ClusterSet clustersCLR = MClust.make(featureSet2, clustersSet, parameter, ubm); parameter.getParameterDiarization().setSaveAllStep(saveAll); parameter.getParameterSegmentationOutputFile().setMask(mask); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".c.seg"); MainTools.writeClusterSet(parameter, clustersCLR, false); } parameter.getParameterSegmentationOutputFile().setMask(mask); parameter.getParameterClustering().setMethod(oldMethod); parameter.getParameterClustering().setThreshold(oldThreshold); parameter.getParameterEM().setEMControl(oldEMControl); parameter.getParameterTopGaussian().setScoreNTop(oldNTop); parameter.getParameterInputFeature().setSpeechMethod(oldSpeechDetectorMethod); parameter.getParameterInputFeature().setSpeechThreshold(oldSpeechDetectorThreshold); return clustersCLR; }
/** * Speech. * * @param threshold the threshold * @param clustersSetBase the clusters set base * @param clustersSegInit the clusters seg init * @param clustersDClust the clusters d clust * @param featureSet the feature set * @param parameter the parameter * @return the cluster set * @throws Exception the exception */ public ClusterSet speech( String threshold, ClusterSet clustersSetBase, ClusterSet clustersSegInit, ClusterSet clustersDClust, AudioFeatureSet featureSet, Parameter parameter) throws Exception { String mask = parameter.getParameterSegmentationOutputFile().getMask(); String oldDecoderPenalty = parameter.getParameterDecoder().getDecoderPenaltyAsString(); // ** Reload MFCC, remove energy and add delta String FeatureFormat = "featureSetTransformation"; AudioFeatureSet featureSet2 = loadFeature( featureSet, parameter, clustersSetBase, FeatureFormat + ",1:3:2:0:0:0,13,0:0:0:0"); String dir = "ester2"; // ** load the model : 8 GMMs with 64 diagonal components InputStream pmsInputStream = getClass().getResourceAsStream(dir + "/sms.gmms"); GMMArrayList pmsVect = MainTools.readGMMContainer(pmsInputStream, parameter.getParameterModel()); // ** set penalties for the i to j states // ** 10 for the first and second model corresponding to boad/narrowband silence // ** 50 for the other jingle speech (f0 f2 f3 fx), jingle and music parameter.getParameterDecoder().setDecoderPenalty(threshold); ClusterSet clustersPMSClust = MDecode.make(featureSet2, clustersSegInit, pmsVect, parameter); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".sms.seg"); MainTools.writeClusterSet(parameter, clustersPMSClust, false); } parameter.getParameterSegmentationOutputFile().setMask(mask); parameter.getParameterDecoder().setDecoderPenalty(oldDecoderPenalty); // ** Filter the segmentation adj acoording the sms segmentation // ** add 25 frames to all speech segments // ** remove silence part if silence segment is less than 25 frames // ** if a speech segment is less than 150 frames, it will be merge to the left or right closest // segments int oldSegmentPadding = parameter.getParameterFilter().getSegmentPadding(); int oldSilenceMinimumLength = parameter.getParameterFilter().getSilenceMinimumLength(); int oldSpeechMinimumLength = parameter.getParameterFilter().getSpeechMinimumLength(); String oldSegmentationFilterFile = parameter.getParameterSegmentationFilterFile().getClusterFilterName(); parameter.getParameterFilter().setSegmentPadding(25); parameter.getParameterFilter().setSilenceMinimumLength(25); parameter.getParameterFilter().setSpeechMinimumLength(150); ClusterSet clustersFltClust = SFilter.make(clustersDClust, clustersPMSClust, parameter); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".flt.seg"); MainTools.writeClusterSet(parameter, clustersFltClust, false); parameter.getParameterSegmentationOutputFile().setMask(mask); } // ** segments of more than 20s are split according of silence present in the pms or using a gmm // silence detector InputStream silenceInputStream = getClass().getResourceAsStream(dir + "/s.gmms"); GMMArrayList sVect = MainTools.readGMMContainer(silenceInputStream, parameter.getParameterModel()); parameter.getParameterSegmentationFilterFile().setClusterFilterName("iS,iT,j"); ClusterSet clustersSplitClust = SSplitSeg.make(featureSet2, clustersFltClust, sVect, clustersPMSClust, parameter); if (parameter.getParameterDiarization().isSaveAllStep()) { parameter.getParameterSegmentationOutputFile().setMask(mask + ".spl.seg"); MainTools.writeClusterSet(parameter, clustersSplitClust, false); parameter.getParameterSegmentationOutputFile().setMask(mask); } parameter.getParameterSegmentationFilterFile().setClusterFilterName(oldSegmentationFilterFile); parameter.getParameterFilter().setSegmentPadding(oldSegmentPadding); parameter.getParameterFilter().setSilenceMinimumLength(oldSilenceMinimumLength); parameter.getParameterFilter().setSpeechMinimumLength(oldSpeechMinimumLength); return clustersSplitClust; }