예제 #1
0
  /**
   * Tun ester2 speaker clr clustering.
   *
   * @param referenceClusterSet the reference cluster set
   * @param uemClusterSet the uem cluster set
   * @param partialKey the partial key
   * @param method the method
   * @param clusterSetBase the cluster set base
   * @param clusterSet the cluster set
   * @param featureSet the feature set
   * @param parameter the parameter
   * @return the diarization result list
   * @throws Exception the exception
   */
  public DiarizationResultList tunEster2SpeakerCLRClustering(
      ClusterSet referenceClusterSet,
      ClusterSet uemClusterSet,
      String partialKey,
      String method,
      ClusterSet clusterSetBase,
      ClusterSet clusterSet,
      AudioFeatureSet featureSet,
      Parameter parameter)
      throws Exception {
    String oldSpeechDetectorMethod = parameter.getParameterInputFeature().getSpeechMethodAsString();
    double oldSpeechDetectorThreshold = parameter.getParameterInputFeature().getSpeechThreshold();
    String oldModelKind = parameter.getParameterModel().getModelKindAsString();
    int oldNumberOfComponent = parameter.getParameterModel().getNumberOfComponents();
    String oldMethod = parameter.getParameterClustering().getMethodAsString();
    double oldThreshold = parameter.getParameterClustering().getThreshold();

    String oldEMControl = parameter.getParameterEM().getEMControl();
    int oldNTop = parameter.getParameterTopGaussian().getScoreNTop();
    boolean oldSaveAll = parameter.getParameterDiarization().isSaveAllStep();

    DiarizationResultList localResult = new DiarizationResultList(cMin, cMax, mult);
    DiarizationError computeError = new DiarizationError(referenceClusterSet, uemClusterSet);
    double prevScore = cMin;

    // ---- Begin NEW v 1.13 ---
    parameter.getParameterInputFeature().setSpeechMethod("E");
    parameter.getParameterInputFeature().setSpeechThreshold(0.1);

    // ---- End NEW v 1.13 ---
    String FeatureFormat = "featureSetTransformation";
    String dir = "ester2";
    InputStream ubmInputStream = getClass().getResourceAsStream(dir + "/ubm.gmm");
    GMMArrayList ubmVect =
        MainTools.readGMMContainer(ubmInputStream, parameter.getParameterModel());
    GMM ubm = ubmVect.get(0);
    /*
     * int nbCep = 16; logger.info("---> nbCep:"+nbCep); FeatureSet featureSet2 = loadFeature(parameter, clusterSet, "audio16kHz2sphinx,1:3:2:0:0:0,"+nbCep+",1:1:300:4"); logger.info("---> nbCep:"+nbCep); //logger.fine("*** nbFeaturesNorm:" +
     * clusterSetBase.getLength());
     */
    // A tester

    AudioFeatureSet featureSet2 =
        loadFeature(featureSet, parameter, clusterSet, FeatureFormat + ",1:3:2:0:0:0,13,1:1:300:4");
    // v5.14
    // FeatureSet featureSet2 = loadFeature(featureSet, parameter, clusterSet, FeatureFormat
    // + ",1:3:2:0:0:0,13,1:1:0:0");
    parameter.getParameterModel().setModelKind("DIAG");
    parameter.getParameterModel().setNumberOfComponents(ubm.getNbOfComponents());
    parameter.getParameterClustering().setMethod(method);
    // ---- Begin NEW v 1.19 ---
    // parameter.getParameterEM().setEMControl("1,1,0.01");
    // parameter.getParameterClustering().setThreshold(0);
    // ---- End NEW v 1.19 ---
    parameter.getParameterClustering().setThreshold(cMax);
    parameter.getParameterEM().setEMControl("1,5,0.01");
    parameter.getParameterTopGaussian().setScoreNTop(5);
    parameter.getParameterDiarization().setSaveAllStep(false);

    CLRHClustering clustering = new CLRHClustering(clusterSet, featureSet2, parameter, ubm);
    // int nbCluster = clusterSet.clusterGetSize();
    // logger.info("initialise clustering CLR clusterSet:"+clusterSet);
    clustering.initialize();

    double score = clustering.getScoreOfCandidatesForMerging();
    DiarizationResult error = computeError.scoreOfMatchedSpeakers(clustering.getClusterSet());
    double errorRate = error.getErrorRate();
    localResult.setResult(prevScore, score, error);
    // prevScore = Math.max(score, prevScore);
    logger.fine(
        "first "
            + parameter.show
            + " key="
            + partialKey
            + " clrScore="
            + score
            + " clrErrorRate="
            + errorRate
            + " clrSize="
            + clustering.getSize()
            + "/"
            + referenceClusterSet.clusterGetSize());
    while ((score < cMax) && (clustering.getSize() > 1)) {
      localResult.setResult(prevScore, score, error);
      prevScore = Math.max(score, prevScore);
      clustering.mergeCandidates();

      // -- start V5.16 --
      // logger.info("--> Decoding");
      // ClusterSet decodeClusterSet = MDecode.make(featureSet2, clustering.getClusterSet(),
      // clustering.getGmmList(), parameter);
      // logger.info("--> Clustering");
      // featureSet2 = loadFeature(featureSet, parameter, decodeClusterSet, FeatureFormat
      // + ",1:3:2:0:0:0,13,1:1:300:4");
      // clustering = new CLRHClustering(decodeClusterSet, featureSet2, parameter, ubm);
      // clustering.initialize();
      // -- end V5.16 --

      score = clustering.getScoreOfCandidatesForMerging();
      error = computeError.scoreOfMatchedSpeakers(clustering.getClusterSet());
      errorRate = error.getErrorRate();
      // localResult.setResult(prevScore, score, error);
      // prevScore = Math.max(score, prevScore);
      logger.fine(
          parameter.show
              + " key="
              + partialKey
              + " clrScore="
              + score
              + " clrErrorRate="
              + errorRate
              + " clrSize="
              + clustering.getSize()
              + "/"
              + referenceClusterSet.clusterGetSize());
    }
    localResult.setResult(prevScore, score, error);
    localResult.setResult(score, cMax, error);
    logger.finer(parameter.show + " key=" + partialKey + " resultat du fichier");
    localResult.log("partial result: " + parameter.show + " " + partialKey);

    clustering.reset();

    parameter.getParameterModel().setNumberOfComponents(oldNumberOfComponent);
    parameter.getParameterModel().setModelKind(oldModelKind);
    parameter.getParameterClustering().setMethod(oldMethod);
    parameter.getParameterClustering().setThreshold(oldThreshold);
    parameter.getParameterEM().setEMControl(oldEMControl);
    parameter.getParameterTopGaussian().setScoreNTop(oldNTop);
    parameter.getParameterInputFeature().setSpeechMethod(oldSpeechDetectorMethod);
    parameter.getParameterInputFeature().setSpeechThreshold(oldSpeechDetectorThreshold);
    parameter.getParameterDiarization().setSaveAllStep(oldSaveAll);

    return localResult;
  }
예제 #2
0
  /**
   * Speaker clustering.
   *
   * @param threshold the threshold
   * @param method the method
   * @param clusterSetBase the cluster set base
   * @param clustersSet the clusters set
   * @param featureSet the feature set
   * @param parameter the parameter
   * @return the cluster set
   * @throws Exception the exception
   */
  public ClusterSet speakerClustering(
      double threshold,
      String method,
      ClusterSet clusterSetBase,
      ClusterSet clustersSet,
      AudioFeatureSet featureSet,
      Parameter parameter)
      throws Exception {
    String mask = parameter.getParameterSegmentationOutputFile().getMask();
    String oldMethod = parameter.getParameterClustering().getMethodAsString();
    double oldThreshold = parameter.getParameterClustering().getThreshold();
    String oldEMControl = parameter.getParameterEM().getEMControl();
    int oldNTop = parameter.getParameterTopGaussian().getScoreNTop();
    String oldSpeechDetectorMethod = parameter.getParameterInputFeature().getSpeechMethodAsString();
    double oldSpeechDetectorThreshold = parameter.getParameterInputFeature().getSpeechThreshold();

    // ** bottom up hierarchical classification using GMMs
    // ** one for each cluster, trained by MAP adaptation of a UBM composed of the fusion of
    // 4x128GMM
    // ** the feature normalization use feature mapping technique, after the cluster frames are
    // centered and reduced
    String dir = "ester2";
    InputStream ubmInputStream = getClass().getResourceAsStream(dir + "/ubm.gmm");
    GMMArrayList ubmVect =
        MainTools.readGMMContainer(ubmInputStream, parameter.getParameterModel());
    GMM ubm = ubmVect.get(0);
    // int nbCep = ubm.getDimension() + 1;
    String FeatureFormat = "featureSetTransformation";

    parameter.getParameterInputFeature().setSpeechMethod("E");
    parameter.getParameterInputFeature().setSpeechThreshold(0.1);

    AudioFeatureSet featureSet2 =
        loadFeature(
            featureSet, parameter, clustersSet, FeatureFormat + ",1:3:2:0:0:0,13,1:1:300:4");
    parameter.getParameterClustering().setMethod(method);
    parameter.getParameterClustering().setThreshold(threshold);
    parameter.getParameterEM().setEMControl("1,5,0.01");
    parameter.getParameterTopGaussian().setScoreNTop(5);
    // ---- Begin NEW v 1.13 ---

    // if (parameter.parameterSpeechDetector.useSpeechDetection() == true) {
    // MSpeechDetector.EnergyThresholdMethod(clustersSet, featureSet, parameter);
    // }
    // ---- End NEW v 1.13 ---

    boolean saveAll = parameter.getParameterDiarization().isSaveAllStep();
    parameter.getParameterDiarization().setSaveAllStep(false);
    ClusterSet clustersCLR = MClust.make(featureSet2, clustersSet, parameter, ubm);
    parameter.getParameterDiarization().setSaveAllStep(saveAll);

    parameter.getParameterSegmentationOutputFile().setMask(mask);
    if (parameter.getParameterDiarization().isSaveAllStep()) {
      parameter.getParameterSegmentationOutputFile().setMask(mask + ".c.seg");
      MainTools.writeClusterSet(parameter, clustersCLR, false);
    }
    parameter.getParameterSegmentationOutputFile().setMask(mask);
    parameter.getParameterClustering().setMethod(oldMethod);
    parameter.getParameterClustering().setThreshold(oldThreshold);
    parameter.getParameterEM().setEMControl(oldEMControl);
    parameter.getParameterTopGaussian().setScoreNTop(oldNTop);
    parameter.getParameterInputFeature().setSpeechMethod(oldSpeechDetectorMethod);
    parameter.getParameterInputFeature().setSpeechThreshold(oldSpeechDetectorThreshold);

    return clustersCLR;
  }